svm.cpp

Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without
00006 modification, are permitted provided that the following conditions
00007 are met:
00008 
00009 1. Redistributions of source code must retain the above copyright
00010 notice, this list of conditions and the following disclaimer.
00011 
00012 2. Redistributions in binary form must reproduce the above copyright
00013 notice, this list of conditions and the following disclaimer in the
00014 documentation and/or other materials provided with the distribution.
00015 
00016 3. Neither name of copyright holders nor the names of its contributors
00017 may be used to endorse or promote products derived from this software
00018 without specific prior written permission.
00019 
00020 
00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 
00033 */
00034 
00035 
00036 #include <math.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <ctype.h>
00040 #include <float.h>
00041 #include <string.h>
00042 #include <stdarg.h>
00043 #include <limits.h>
00044 #include <locale.h>
00045 #include "svm.h"
00046 
00047 namespace celeste
00048 {
00049 // changes to original libsvm
00050 // added license to header (Tim Nugent)
00051 // added celeste namespace
00052 // switched to C++ header instead a plain C header
00053 
00054 int libsvm_version = LIBSVM_VERSION;
00055 typedef float Qfloat;
00056 typedef signed char schar;
00057 #ifndef min
00058 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00059 #endif
00060 #ifndef max
00061 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00062 #endif
00063 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00064 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00065 {
00066         dst = new T[n];
00067         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00068 }
00069 static inline double powi(double base, int times)
00070 {
00071         double tmp = base, ret = 1.0;
00072 
00073         for(int t=times; t>0; t/=2)
00074         {
00075                 if(t%2==1) ret*=tmp;
00076                 tmp = tmp * tmp;
00077         }
00078         return ret;
00079 }
00080 #define INF HUGE_VAL
00081 #define TAU 1e-12
00082 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00083 
00084 static void print_string_stdout(const char *s)
00085 {
00086         fputs(s,stdout);
00087         fflush(stdout);
00088 }
00089 static void (*svm_print_string) (const char *) = &print_string_stdout;
00090 #if 1
00091 static void info(const char *fmt,...)
00092 {
00093         char buf[BUFSIZ];
00094         va_list ap;
00095         va_start(ap,fmt);
00096         vsprintf(buf,fmt,ap);
00097         va_end(ap);
00098         (*svm_print_string)(buf);
00099 }
00100 #else
00101 static void info(const char *fmt,...) {}
00102 #endif
00103 
00104 //
00105 // Kernel Cache
00106 //
00107 // l is the number of total data items
00108 // size is the cache size limit in bytes
00109 //
00110 class Cache
00111 {
00112 public:
00113         Cache(int l,long int size);
00114         ~Cache();
00115 
00116         // request data [0,len)
00117         // return some position p where [p,len) need to be filled
00118         // (p >= len if nothing needs to be filled)
00119         int get_data(const int index, Qfloat **data, int len);
00120         void swap_index(int i, int j);
00121 private:
00122         int l;
00123         long int size;
00124         struct head_t
00125         {
00126                 head_t *prev, *next;    // a circular list
00127                 Qfloat *data;
00128                 int len;                // data[0,len) is cached in this entry
00129         };
00130 
00131         head_t *head;
00132         head_t lru_head;
00133         void lru_delete(head_t *h);
00134         void lru_insert(head_t *h);
00135 };
00136 
00137 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00138 {
00139         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00140         size /= sizeof(Qfloat);
00141         size -= l * sizeof(head_t) / sizeof(Qfloat);
00142         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00143         lru_head.next = lru_head.prev = &lru_head;
00144 }
00145 
00146 Cache::~Cache()
00147 {
00148         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00149                 free(h->data);
00150         free(head);
00151 }
00152 
00153 void Cache::lru_delete(head_t *h)
00154 {
00155         // delete from current location
00156         h->prev->next = h->next;
00157         h->next->prev = h->prev;
00158 }
00159 
00160 void Cache::lru_insert(head_t *h)
00161 {
00162         // insert to last position
00163         h->next = &lru_head;
00164         h->prev = lru_head.prev;
00165         h->prev->next = h;
00166         h->next->prev = h;
00167 }
00168 
00169 int Cache::get_data(const int index, Qfloat **data, int len)
00170 {
00171         head_t *h = &head[index];
00172         if(h->len) lru_delete(h);
00173         int more = len - h->len;
00174 
00175         if(more > 0)
00176         {
00177                 // free old space
00178                 while(size < more)
00179                 {
00180                         head_t *old = lru_head.next;
00181                         lru_delete(old);
00182                         free(old->data);
00183                         size += old->len;
00184                         old->data = 0;
00185                         old->len = 0;
00186                 }
00187 
00188                 // allocate new space
00189                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00190                 size -= more;
00191                 swap(h->len,len);
00192         }
00193 
00194         lru_insert(h);
00195         *data = h->data;
00196         return len;
00197 }
00198 
00199 void Cache::swap_index(int i, int j)
00200 {
00201         if(i==j) return;
00202 
00203         if(head[i].len) lru_delete(&head[i]);
00204         if(head[j].len) lru_delete(&head[j]);
00205         swap(head[i].data,head[j].data);
00206         swap(head[i].len,head[j].len);
00207         if(head[i].len) lru_insert(&head[i]);
00208         if(head[j].len) lru_insert(&head[j]);
00209 
00210         if(i>j) swap(i,j);
00211         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00212         {
00213                 if(h->len > i)
00214                 {
00215                         if(h->len > j)
00216                                 swap(h->data[i],h->data[j]);
00217                         else
00218                         {
00219                                 // give up
00220                                 lru_delete(h);
00221                                 free(h->data);
00222                                 size += h->len;
00223                                 h->data = 0;
00224                                 h->len = 0;
00225                         }
00226                 }
00227         }
00228 }
00229 
00230 //
00231 // Kernel evaluation
00232 //
00233 // the static method k_function is for doing single kernel evaluation
00234 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00235 // the member function get_Q is for getting one column from the Q Matrix
00236 //
00237 class QMatrix {
00238 public:
00239         virtual Qfloat *get_Q(int column, int len) const = 0;
00240         virtual double *get_QD() const = 0;
00241         virtual void swap_index(int i, int j) const = 0;
00242         virtual ~QMatrix() {}
00243 };
00244 
00245 class Kernel: public QMatrix {
00246 public:
00247         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00248         virtual ~Kernel();
00249 
00250         static double k_function(const svm_node *x, const svm_node *y,
00251                                  const svm_parameter& param);
00252         virtual Qfloat *get_Q(int column, int len) const = 0;
00253         virtual double *get_QD() const = 0;
00254         virtual void swap_index(int i, int j) const     // no so const...
00255         {
00256                 swap(x[i],x[j]);
00257                 if(x_square) swap(x_square[i],x_square[j]);
00258         }
00259 protected:
00260 
00261         double (Kernel::*kernel_function)(int i, int j) const;
00262 
00263 private:
00264         const svm_node **x;
00265         double *x_square;
00266 
00267         // svm_parameter
00268         const int kernel_type;
00269         const int degree;
00270         const double gamma;
00271         const double coef0;
00272 
00273         static double dot(const svm_node *px, const svm_node *py);
00274         double kernel_linear(int i, int j) const
00275         {
00276                 return dot(x[i],x[j]);
00277         }
00278         double kernel_poly(int i, int j) const
00279         {
00280                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00281         }
00282         double kernel_rbf(int i, int j) const
00283         {
00284                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00285         }
00286         double kernel_sigmoid(int i, int j) const
00287         {
00288                 return tanh(gamma*dot(x[i],x[j])+coef0);
00289         }
00290         double kernel_precomputed(int i, int j) const
00291         {
00292                 return x[i][(int)(x[j][0].value)].value;
00293         }
00294 };
00295 
00296 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00297 :kernel_type(param.kernel_type), degree(param.degree),
00298  gamma(param.gamma), coef0(param.coef0)
00299 {
00300         switch(kernel_type)
00301         {
00302                 case LINEAR:
00303                         kernel_function = &Kernel::kernel_linear;
00304                         break;
00305                 case POLY:
00306                         kernel_function = &Kernel::kernel_poly;
00307                         break;
00308                 case RBF:
00309                         kernel_function = &Kernel::kernel_rbf;
00310                         break;
00311                 case SIGMOID:
00312                         kernel_function = &Kernel::kernel_sigmoid;
00313                         break;
00314                 case PRECOMPUTED:
00315                         kernel_function = &Kernel::kernel_precomputed;
00316                         break;
00317         }
00318 
00319         clone(x,x_,l);
00320 
00321         if(kernel_type == RBF)
00322         {
00323                 x_square = new double[l];
00324                 for(int i=0;i<l;i++)
00325                         x_square[i] = dot(x[i],x[i]);
00326         }
00327         else
00328                 x_square = 0;
00329 }
00330 
00331 Kernel::~Kernel()
00332 {
00333         delete[] x;
00334         delete[] x_square;
00335 }
00336 
00337 double Kernel::dot(const svm_node *px, const svm_node *py)
00338 {
00339         double sum = 0;
00340         while(px->index != -1 && py->index != -1)
00341         {
00342                 if(px->index == py->index)
00343                 {
00344                         sum += px->value * py->value;
00345                         ++px;
00346                         ++py;
00347                 }
00348                 else
00349                 {
00350                         if(px->index > py->index)
00351                                 ++py;
00352                         else
00353                                 ++px;
00354                 }                       
00355         }
00356         return sum;
00357 }
00358 
00359 double Kernel::k_function(const svm_node *x, const svm_node *y,
00360                           const svm_parameter& param)
00361 {
00362         switch(param.kernel_type)
00363         {
00364                 case LINEAR:
00365                         return dot(x,y);
00366                 case POLY:
00367                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00368                 case RBF:
00369                 {
00370                         double sum = 0;
00371                         while(x->index != -1 && y->index !=-1)
00372                         {
00373                                 if(x->index == y->index)
00374                                 {
00375                                         double d = x->value - y->value;
00376                                         sum += d*d;
00377                                         ++x;
00378                                         ++y;
00379                                 }
00380                                 else
00381                                 {
00382                                         if(x->index > y->index)
00383                                         {       
00384                                                 sum += y->value * y->value;
00385                                                 ++y;
00386                                         }
00387                                         else
00388                                         {
00389                                                 sum += x->value * x->value;
00390                                                 ++x;
00391                                         }
00392                                 }
00393                         }
00394 
00395                         while(x->index != -1)
00396                         {
00397                                 sum += x->value * x->value;
00398                                 ++x;
00399                         }
00400 
00401                         while(y->index != -1)
00402                         {
00403                                 sum += y->value * y->value;
00404                                 ++y;
00405                         }
00406                         
00407                         return exp(-param.gamma*sum);
00408                 }
00409                 case SIGMOID:
00410                         return tanh(param.gamma*dot(x,y)+param.coef0);
00411                 case PRECOMPUTED:  //x: test (validation), y: SV
00412                         return x[(int)(y->value)].value;
00413                 default:
00414                         return 0;  // Unreachable 
00415         }
00416 }
00417 
00418 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00419 // Solves:
00420 //
00421 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00422 //
00423 //              y^T \alpha = \delta
00424 //              y_i = +1 or -1
00425 //              0 <= alpha_i <= Cp for y_i = 1
00426 //              0 <= alpha_i <= Cn for y_i = -1
00427 //
00428 // Given:
00429 //
00430 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00431 //      l is the size of vectors and matrices
00432 //      eps is the stopping tolerance
00433 //
00434 // solution will be put in \alpha, objective value will be put in obj
00435 //
00436 class Solver {
00437 public:
00438         Solver() {};
00439         virtual ~Solver() {};
00440 
00441         struct SolutionInfo {
00442                 double obj;
00443                 double rho;
00444                 double upper_bound_p;
00445                 double upper_bound_n;
00446                 double r;       // for Solver_NU
00447         };
00448 
00449         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00450                    double *alpha_, double Cp, double Cn, double eps,
00451                    SolutionInfo* si, int shrinking);
00452 protected:
00453         int active_size;
00454         schar *y;
00455         double *G;              // gradient of objective function
00456         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00457         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00458         double *alpha;
00459         const QMatrix *Q;
00460         const double *QD;
00461         double eps;
00462         double Cp,Cn;
00463         double *p;
00464         int *active_set;
00465         double *G_bar;          // gradient, if we treat free variables as 0
00466         int l;
00467         bool unshrink;  // XXX
00468 
00469         double get_C(int i)
00470         {
00471                 return (y[i] > 0)? Cp : Cn;
00472         }
00473         void update_alpha_status(int i)
00474         {
00475                 if(alpha[i] >= get_C(i))
00476                         alpha_status[i] = UPPER_BOUND;
00477                 else if(alpha[i] <= 0)
00478                         alpha_status[i] = LOWER_BOUND;
00479                 else alpha_status[i] = FREE;
00480         }
00481         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00482         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00483         bool is_free(int i) { return alpha_status[i] == FREE; }
00484         void swap_index(int i, int j);
00485         void reconstruct_gradient();
00486         virtual int select_working_set(int &i, int &j);
00487         virtual double calculate_rho();
00488         virtual void do_shrinking();
00489 private:
00490         bool be_shrunk(int i, double Gmax1, double Gmax2);
00491 };
00492 
00493 void Solver::swap_index(int i, int j)
00494 {
00495         Q->swap_index(i,j);
00496         swap(y[i],y[j]);
00497         swap(G[i],G[j]);
00498         swap(alpha_status[i],alpha_status[j]);
00499         swap(alpha[i],alpha[j]);
00500         swap(p[i],p[j]);
00501         swap(active_set[i],active_set[j]);
00502         swap(G_bar[i],G_bar[j]);
00503 }
00504 
00505 void Solver::reconstruct_gradient()
00506 {
00507         // reconstruct inactive elements of G from G_bar and free variables
00508 
00509         if(active_size == l) return;
00510 
00511         int i,j;
00512         int nr_free = 0;
00513 
00514         for(j=active_size;j<l;j++)
00515                 G[j] = G_bar[j] + p[j];
00516 
00517         for(j=0;j<active_size;j++)
00518                 if(is_free(j))
00519                         nr_free++;
00520 
00521         if(2*nr_free < active_size)
00522                 info("\nWARNING: using -h 0 may be faster\n");
00523 
00524         if (nr_free*l > 2*active_size*(l-active_size))
00525         {
00526                 for(i=active_size;i<l;i++)
00527                 {
00528                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00529                         for(j=0;j<active_size;j++)
00530                                 if(is_free(j))
00531                                         G[i] += alpha[j] * Q_i[j];
00532                 }
00533         }
00534         else
00535         {
00536                 for(i=0;i<active_size;i++)
00537                         if(is_free(i))
00538                         {
00539                                 const Qfloat *Q_i = Q->get_Q(i,l);
00540                                 double alpha_i = alpha[i];
00541                                 for(j=active_size;j<l;j++)
00542                                         G[j] += alpha_i * Q_i[j];
00543                         }
00544         }
00545 }
00546 
00547 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00548                    double *alpha_, double Cp, double Cn, double eps,
00549                    SolutionInfo* si, int shrinking)
00550 {
00551         this->l = l;
00552         this->Q = &Q;
00553         QD=Q.get_QD();
00554         clone(p, p_,l);
00555         clone(y, y_,l);
00556         clone(alpha,alpha_,l);
00557         this->Cp = Cp;
00558         this->Cn = Cn;
00559         this->eps = eps;
00560         unshrink = false;
00561 
00562         // initialize alpha_status
00563         {
00564                 alpha_status = new char[l];
00565                 for(int i=0;i<l;i++)
00566                         update_alpha_status(i);
00567         }
00568 
00569         // initialize active set (for shrinking)
00570         {
00571                 active_set = new int[l];
00572                 for(int i=0;i<l;i++)
00573                         active_set[i] = i;
00574                 active_size = l;
00575         }
00576 
00577         // initialize gradient
00578         {
00579                 G = new double[l];
00580                 G_bar = new double[l];
00581                 int i;
00582                 for(i=0;i<l;i++)
00583                 {
00584                         G[i] = p[i];
00585                         G_bar[i] = 0;
00586                 }
00587                 for(i=0;i<l;i++)
00588                         if(!is_lower_bound(i))
00589                         {
00590                                 const Qfloat *Q_i = Q.get_Q(i,l);
00591                                 double alpha_i = alpha[i];
00592                                 int j;
00593                                 for(j=0;j<l;j++)
00594                                         G[j] += alpha_i*Q_i[j];
00595                                 if(is_upper_bound(i))
00596                                         for(j=0;j<l;j++)
00597                                                 G_bar[j] += get_C(i) * Q_i[j];
00598                         }
00599         }
00600 
00601         // optimization step
00602 
00603         int iter = 0;
00604         int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00605         int counter = min(l,1000)+1;
00606         
00607         while(iter < max_iter)
00608         {
00609                 // show progress and do shrinking
00610 
00611                 if(--counter == 0)
00612                 {
00613                         counter = min(l,1000);
00614                         if(shrinking) do_shrinking();
00615                         info(".");
00616                 }
00617 
00618                 int i,j;
00619                 if(select_working_set(i,j)!=0)
00620                 {
00621                         // reconstruct the whole gradient
00622                         reconstruct_gradient();
00623                         // reset active set size and check
00624                         active_size = l;
00625                         info("*");
00626                         if(select_working_set(i,j)!=0)
00627                                 break;
00628                         else
00629                                 counter = 1;    // do shrinking next iteration
00630                 }
00631                 
00632                 ++iter;
00633 
00634                 // update alpha[i] and alpha[j], handle bounds carefully
00635                 
00636                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00637                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00638 
00639                 double C_i = get_C(i);
00640                 double C_j = get_C(j);
00641 
00642                 double old_alpha_i = alpha[i];
00643                 double old_alpha_j = alpha[j];
00644 
00645                 if(y[i]!=y[j])
00646                 {
00647                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00648                         if (quad_coef <= 0)
00649                                 quad_coef = TAU;
00650                         double delta = (-G[i]-G[j])/quad_coef;
00651                         double diff = alpha[i] - alpha[j];
00652                         alpha[i] += delta;
00653                         alpha[j] += delta;
00654                         
00655                         if(diff > 0)
00656                         {
00657                                 if(alpha[j] < 0)
00658                                 {
00659                                         alpha[j] = 0;
00660                                         alpha[i] = diff;
00661                                 }
00662                         }
00663                         else
00664                         {
00665                                 if(alpha[i] < 0)
00666                                 {
00667                                         alpha[i] = 0;
00668                                         alpha[j] = -diff;
00669                                 }
00670                         }
00671                         if(diff > C_i - C_j)
00672                         {
00673                                 if(alpha[i] > C_i)
00674                                 {
00675                                         alpha[i] = C_i;
00676                                         alpha[j] = C_i - diff;
00677                                 }
00678                         }
00679                         else
00680                         {
00681                                 if(alpha[j] > C_j)
00682                                 {
00683                                         alpha[j] = C_j;
00684                                         alpha[i] = C_j + diff;
00685                                 }
00686                         }
00687                 }
00688                 else
00689                 {
00690                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00691                         if (quad_coef <= 0)
00692                                 quad_coef = TAU;
00693                         double delta = (G[i]-G[j])/quad_coef;
00694                         double sum = alpha[i] + alpha[j];
00695                         alpha[i] -= delta;
00696                         alpha[j] += delta;
00697 
00698                         if(sum > C_i)
00699                         {
00700                                 if(alpha[i] > C_i)
00701                                 {
00702                                         alpha[i] = C_i;
00703                                         alpha[j] = sum - C_i;
00704                                 }
00705                         }
00706                         else
00707                         {
00708                                 if(alpha[j] < 0)
00709                                 {
00710                                         alpha[j] = 0;
00711                                         alpha[i] = sum;
00712                                 }
00713                         }
00714                         if(sum > C_j)
00715                         {
00716                                 if(alpha[j] > C_j)
00717                                 {
00718                                         alpha[j] = C_j;
00719                                         alpha[i] = sum - C_j;
00720                                 }
00721                         }
00722                         else
00723                         {
00724                                 if(alpha[i] < 0)
00725                                 {
00726                                         alpha[i] = 0;
00727                                         alpha[j] = sum;
00728                                 }
00729                         }
00730                 }
00731 
00732                 // update G
00733 
00734                 double delta_alpha_i = alpha[i] - old_alpha_i;
00735                 double delta_alpha_j = alpha[j] - old_alpha_j;
00736                 
00737                 for(int k=0;k<active_size;k++)
00738                 {
00739                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00740                 }
00741 
00742                 // update alpha_status and G_bar
00743 
00744                 {
00745                         bool ui = is_upper_bound(i);
00746                         bool uj = is_upper_bound(j);
00747                         update_alpha_status(i);
00748                         update_alpha_status(j);
00749                         int k;
00750                         if(ui != is_upper_bound(i))
00751                         {
00752                                 Q_i = Q.get_Q(i,l);
00753                                 if(ui)
00754                                         for(k=0;k<l;k++)
00755                                                 G_bar[k] -= C_i * Q_i[k];
00756                                 else
00757                                         for(k=0;k<l;k++)
00758                                                 G_bar[k] += C_i * Q_i[k];
00759                         }
00760 
00761                         if(uj != is_upper_bound(j))
00762                         {
00763                                 Q_j = Q.get_Q(j,l);
00764                                 if(uj)
00765                                         for(k=0;k<l;k++)
00766                                                 G_bar[k] -= C_j * Q_j[k];
00767                                 else
00768                                         for(k=0;k<l;k++)
00769                                                 G_bar[k] += C_j * Q_j[k];
00770                         }
00771                 }
00772         }
00773 
00774         if(iter >= max_iter)
00775         {
00776                 if(active_size < l)
00777                 {
00778                         // reconstruct the whole gradient to calculate objective value
00779                         reconstruct_gradient();
00780                         active_size = l;
00781                         info("*");
00782                 }
00783                 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
00784         }
00785 
00786         // calculate rho
00787 
00788         si->rho = calculate_rho();
00789 
00790         // calculate objective value
00791         {
00792                 double v = 0;
00793                 int i;
00794                 for(i=0;i<l;i++)
00795                         v += alpha[i] * (G[i] + p[i]);
00796 
00797                 si->obj = v/2;
00798         }
00799 
00800         // put back the solution
00801         {
00802                 for(int i=0;i<l;i++)
00803                         alpha_[active_set[i]] = alpha[i];
00804         }
00805 
00806         // juggle everything back
00807         /*{
00808                 for(int i=0;i<l;i++)
00809                         while(active_set[i] != i)
00810                                 swap_index(i,active_set[i]);
00811                                 // or Q.swap_index(i,active_set[i]);
00812         }*/
00813 
00814         si->upper_bound_p = Cp;
00815         si->upper_bound_n = Cn;
00816 
00817         info("\noptimization finished, #iter = %d\n",iter);
00818 
00819         delete[] p;
00820         delete[] y;
00821         delete[] alpha;
00822         delete[] alpha_status;
00823         delete[] active_set;
00824         delete[] G;
00825         delete[] G_bar;
00826 }
00827 
00828 // return 1 if already optimal, return 0 otherwise
00829 int Solver::select_working_set(int &out_i, int &out_j)
00830 {
00831         // return i,j such that
00832         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00833         // j: minimizes the decrease of obj value
00834         //    (if quadratic coefficeint <= 0, replace it with tau)
00835         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00836         
00837         double Gmax = -INF;
00838         double Gmax2 = -INF;
00839         int Gmax_idx = -1;
00840         int Gmin_idx = -1;
00841         double obj_diff_min = INF;
00842 
00843         for(int t=0;t<active_size;t++)
00844                 if(y[t]==+1)    
00845                 {
00846                         if(!is_upper_bound(t))
00847                                 if(-G[t] >= Gmax)
00848                                 {
00849                                         Gmax = -G[t];
00850                                         Gmax_idx = t;
00851                                 }
00852                 }
00853                 else
00854                 {
00855                         if(!is_lower_bound(t))
00856                                 if(G[t] >= Gmax)
00857                                 {
00858                                         Gmax = G[t];
00859                                         Gmax_idx = t;
00860                                 }
00861                 }
00862 
00863         int i = Gmax_idx;
00864         const Qfloat *Q_i = NULL;
00865         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00866                 Q_i = Q->get_Q(i,active_size);
00867 
00868         for(int j=0;j<active_size;j++)
00869         {
00870                 if(y[j]==+1)
00871                 {
00872                         if (!is_lower_bound(j))
00873                         {
00874                                 double grad_diff=Gmax+G[j];
00875                                 if (G[j] >= Gmax2)
00876                                         Gmax2 = G[j];
00877                                 if (grad_diff > 0)
00878                                 {
00879                                         double obj_diff;
00880                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00881                                         if (quad_coef > 0)
00882                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00883                                         else
00884                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00885 
00886                                         if (obj_diff <= obj_diff_min)
00887                                         {
00888                                                 Gmin_idx=j;
00889                                                 obj_diff_min = obj_diff;
00890                                         }
00891                                 }
00892                         }
00893                 }
00894                 else
00895                 {
00896                         if (!is_upper_bound(j))
00897                         {
00898                                 double grad_diff= Gmax-G[j];
00899                                 if (-G[j] >= Gmax2)
00900                                         Gmax2 = -G[j];
00901                                 if (grad_diff > 0)
00902                                 {
00903                                         double obj_diff;
00904                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00905                                         if (quad_coef > 0)
00906                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00907                                         else
00908                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00909 
00910                                         if (obj_diff <= obj_diff_min)
00911                                         {
00912                                                 Gmin_idx=j;
00913                                                 obj_diff_min = obj_diff;
00914                                         }
00915                                 }
00916                         }
00917                 }
00918         }
00919 
00920         if(Gmax+Gmax2 < eps)
00921                 return 1;
00922 
00923         out_i = Gmax_idx;
00924         out_j = Gmin_idx;
00925         return 0;
00926 }
00927 
00928 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00929 {
00930         if(is_upper_bound(i))
00931         {
00932                 if(y[i]==+1)
00933                         return(-G[i] > Gmax1);
00934                 else
00935                         return(-G[i] > Gmax2);
00936         }
00937         else if(is_lower_bound(i))
00938         {
00939                 if(y[i]==+1)
00940                         return(G[i] > Gmax2);
00941                 else    
00942                         return(G[i] > Gmax1);
00943         }
00944         else
00945                 return(false);
00946 }
00947 
00948 void Solver::do_shrinking()
00949 {
00950         int i;
00951         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00952         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00953 
00954         // find maximal violating pair first
00955         for(i=0;i<active_size;i++)
00956         {
00957                 if(y[i]==+1)    
00958                 {
00959                         if(!is_upper_bound(i))  
00960                         {
00961                                 if(-G[i] >= Gmax1)
00962                                         Gmax1 = -G[i];
00963                         }
00964                         if(!is_lower_bound(i))  
00965                         {
00966                                 if(G[i] >= Gmax2)
00967                                         Gmax2 = G[i];
00968                         }
00969                 }
00970                 else    
00971                 {
00972                         if(!is_upper_bound(i))  
00973                         {
00974                                 if(-G[i] >= Gmax2)
00975                                         Gmax2 = -G[i];
00976                         }
00977                         if(!is_lower_bound(i))  
00978                         {
00979                                 if(G[i] >= Gmax1)
00980                                         Gmax1 = G[i];
00981                         }
00982                 }
00983         }
00984 
00985         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00986         {
00987                 unshrink = true;
00988                 reconstruct_gradient();
00989                 active_size = l;
00990                 info("*");
00991         }
00992 
00993         for(i=0;i<active_size;i++)
00994                 if (be_shrunk(i, Gmax1, Gmax2))
00995                 {
00996                         active_size--;
00997                         while (active_size > i)
00998                         {
00999                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
01000                                 {
01001                                         swap_index(i,active_size);
01002                                         break;
01003                                 }
01004                                 active_size--;
01005                         }
01006                 }
01007 }
01008 
01009 double Solver::calculate_rho()
01010 {
01011         double r;
01012         int nr_free = 0;
01013         double ub = INF, lb = -INF, sum_free = 0;
01014         for(int i=0;i<active_size;i++)
01015         {
01016                 double yG = y[i]*G[i];
01017 
01018                 if(is_upper_bound(i))
01019                 {
01020                         if(y[i]==-1)
01021                                 ub = min(ub,yG);
01022                         else
01023                                 lb = max(lb,yG);
01024                 }
01025                 else if(is_lower_bound(i))
01026                 {
01027                         if(y[i]==+1)
01028                                 ub = min(ub,yG);
01029                         else
01030                                 lb = max(lb,yG);
01031                 }
01032                 else
01033                 {
01034                         ++nr_free;
01035                         sum_free += yG;
01036                 }
01037         }
01038 
01039         if(nr_free>0)
01040                 r = sum_free/nr_free;
01041         else
01042                 r = (ub+lb)/2;
01043 
01044         return r;
01045 }
01046 
01047 //
01048 // Solver for nu-svm classification and regression
01049 //
01050 // additional constraint: e^T \alpha = constant
01051 //
01052 class Solver_NU: public Solver
01053 {
01054 public:
01055         Solver_NU() {}
01056         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01057                    double *alpha, double Cp, double Cn, double eps,
01058                    SolutionInfo* si, int shrinking)
01059         {
01060                 this->si = si;
01061                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01062         }
01063 private:
01064         SolutionInfo *si;
01065         int select_working_set(int &i, int &j);
01066         double calculate_rho();
01067         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01068         void do_shrinking();
01069 };
01070 
01071 // return 1 if already optimal, return 0 otherwise
01072 int Solver_NU::select_working_set(int &out_i, int &out_j)
01073 {
01074         // return i,j such that y_i = y_j and
01075         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01076         // j: minimizes the decrease of obj value
01077         //    (if quadratic coefficeint <= 0, replace it with tau)
01078         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01079 
01080         double Gmaxp = -INF;
01081         double Gmaxp2 = -INF;
01082         int Gmaxp_idx = -1;
01083 
01084         double Gmaxn = -INF;
01085         double Gmaxn2 = -INF;
01086         int Gmaxn_idx = -1;
01087 
01088         int Gmin_idx = -1;
01089         double obj_diff_min = INF;
01090 
01091         for(int t=0;t<active_size;t++)
01092                 if(y[t]==+1)
01093                 {
01094                         if(!is_upper_bound(t))
01095                                 if(-G[t] >= Gmaxp)
01096                                 {
01097                                         Gmaxp = -G[t];
01098                                         Gmaxp_idx = t;
01099                                 }
01100                 }
01101                 else
01102                 {
01103                         if(!is_lower_bound(t))
01104                                 if(G[t] >= Gmaxn)
01105                                 {
01106                                         Gmaxn = G[t];
01107                                         Gmaxn_idx = t;
01108                                 }
01109                 }
01110 
01111         int ip = Gmaxp_idx;
01112         int in = Gmaxn_idx;
01113         const Qfloat *Q_ip = NULL;
01114         const Qfloat *Q_in = NULL;
01115         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01116                 Q_ip = Q->get_Q(ip,active_size);
01117         if(in != -1)
01118                 Q_in = Q->get_Q(in,active_size);
01119 
01120         for(int j=0;j<active_size;j++)
01121         {
01122                 if(y[j]==+1)
01123                 {
01124                         if (!is_lower_bound(j)) 
01125                         {
01126                                 double grad_diff=Gmaxp+G[j];
01127                                 if (G[j] >= Gmaxp2)
01128                                         Gmaxp2 = G[j];
01129                                 if (grad_diff > 0)
01130                                 {
01131                                         double obj_diff;
01132                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01133                                         if (quad_coef > 0)
01134                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01135                                         else
01136                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01137 
01138                                         if (obj_diff <= obj_diff_min)
01139                                         {
01140                                                 Gmin_idx=j;
01141                                                 obj_diff_min = obj_diff;
01142                                         }
01143                                 }
01144                         }
01145                 }
01146                 else
01147                 {
01148                         if (!is_upper_bound(j))
01149                         {
01150                                 double grad_diff=Gmaxn-G[j];
01151                                 if (-G[j] >= Gmaxn2)
01152                                         Gmaxn2 = -G[j];
01153                                 if (grad_diff > 0)
01154                                 {
01155                                         double obj_diff;
01156                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01157                                         if (quad_coef > 0)
01158                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01159                                         else
01160                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01161 
01162                                         if (obj_diff <= obj_diff_min)
01163                                         {
01164                                                 Gmin_idx=j;
01165                                                 obj_diff_min = obj_diff;
01166                                         }
01167                                 }
01168                         }
01169                 }
01170         }
01171 
01172         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01173                 return 1;
01174 
01175         if (y[Gmin_idx] == +1)
01176                 out_i = Gmaxp_idx;
01177         else
01178                 out_i = Gmaxn_idx;
01179         out_j = Gmin_idx;
01180 
01181         return 0;
01182 }
01183 
01184 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01185 {
01186         if(is_upper_bound(i))
01187         {
01188                 if(y[i]==+1)
01189                         return(-G[i] > Gmax1);
01190                 else    
01191                         return(-G[i] > Gmax4);
01192         }
01193         else if(is_lower_bound(i))
01194         {
01195                 if(y[i]==+1)
01196                         return(G[i] > Gmax2);
01197                 else    
01198                         return(G[i] > Gmax3);
01199         }
01200         else
01201                 return(false);
01202 }
01203 
01204 void Solver_NU::do_shrinking()
01205 {
01206         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01207         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01208         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01209         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01210 
01211         // find maximal violating pair first
01212         int i;
01213         for(i=0;i<active_size;i++)
01214         {
01215                 if(!is_upper_bound(i))
01216                 {
01217                         if(y[i]==+1)
01218                         {
01219                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01220                         }
01221                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01222                 }
01223                 if(!is_lower_bound(i))
01224                 {
01225                         if(y[i]==+1)
01226                         {       
01227                                 if(G[i] > Gmax2) Gmax2 = G[i];
01228                         }
01229                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01230                 }
01231         }
01232 
01233         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01234         {
01235                 unshrink = true;
01236                 reconstruct_gradient();
01237                 active_size = l;
01238         }
01239 
01240         for(i=0;i<active_size;i++)
01241                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01242                 {
01243                         active_size--;
01244                         while (active_size > i)
01245                         {
01246                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01247                                 {
01248                                         swap_index(i,active_size);
01249                                         break;
01250                                 }
01251                                 active_size--;
01252                         }
01253                 }
01254 }
01255 
01256 double Solver_NU::calculate_rho()
01257 {
01258         int nr_free1 = 0,nr_free2 = 0;
01259         double ub1 = INF, ub2 = INF;
01260         double lb1 = -INF, lb2 = -INF;
01261         double sum_free1 = 0, sum_free2 = 0;
01262 
01263         for(int i=0;i<active_size;i++)
01264         {
01265                 if(y[i]==+1)
01266                 {
01267                         if(is_upper_bound(i))
01268                                 lb1 = max(lb1,G[i]);
01269                         else if(is_lower_bound(i))
01270                                 ub1 = min(ub1,G[i]);
01271                         else
01272                         {
01273                                 ++nr_free1;
01274                                 sum_free1 += G[i];
01275                         }
01276                 }
01277                 else
01278                 {
01279                         if(is_upper_bound(i))
01280                                 lb2 = max(lb2,G[i]);
01281                         else if(is_lower_bound(i))
01282                                 ub2 = min(ub2,G[i]);
01283                         else
01284                         {
01285                                 ++nr_free2;
01286                                 sum_free2 += G[i];
01287                         }
01288                 }
01289         }
01290 
01291         double r1,r2;
01292         if(nr_free1 > 0)
01293                 r1 = sum_free1/nr_free1;
01294         else
01295                 r1 = (ub1+lb1)/2;
01296         
01297         if(nr_free2 > 0)
01298                 r2 = sum_free2/nr_free2;
01299         else
01300                 r2 = (ub2+lb2)/2;
01301         
01302         si->r = (r1+r2)/2;
01303         return (r1-r2)/2;
01304 }
01305 
01306 //
01307 // Q matrices for various formulations
01308 //
01309 class SVC_Q: public Kernel
01310 { 
01311 public:
01312         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01313         :Kernel(prob.l, prob.x, param)
01314         {
01315                 clone(y,y_,prob.l);
01316                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01317                 QD = new double[prob.l];
01318                 for(int i=0;i<prob.l;i++)
01319                         QD[i] = (this->*kernel_function)(i,i);
01320         }
01321         
01322         Qfloat *get_Q(int i, int len) const
01323         {
01324                 Qfloat *data;
01325                 int start;
01326                 if((start = cache->get_data(i,&data,len)) < len)
01327                 {
01328                         for(int j=start;j<len;j++)
01329                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01330                 }
01331                 return data;
01332         }
01333 
01334         double *get_QD() const
01335         {
01336                 return QD;
01337         }
01338 
01339         void swap_index(int i, int j) const
01340         {
01341                 cache->swap_index(i,j);
01342                 Kernel::swap_index(i,j);
01343                 swap(y[i],y[j]);
01344                 swap(QD[i],QD[j]);
01345         }
01346 
01347         ~SVC_Q()
01348         {
01349                 delete[] y;
01350                 delete cache;
01351                 delete[] QD;
01352         }
01353 private:
01354         schar *y;
01355         Cache *cache;
01356         double *QD;
01357 };
01358 
01359 class ONE_CLASS_Q: public Kernel
01360 {
01361 public:
01362         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01363         :Kernel(prob.l, prob.x, param)
01364         {
01365                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01366                 QD = new double[prob.l];
01367                 for(int i=0;i<prob.l;i++)
01368                         QD[i] = (this->*kernel_function)(i,i);
01369         }
01370         
01371         Qfloat *get_Q(int i, int len) const
01372         {
01373                 Qfloat *data;
01374                 int start;
01375                 if((start = cache->get_data(i,&data,len)) < len)
01376                 {
01377                         for(int j=start;j<len;j++)
01378                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01379                 }
01380                 return data;
01381         }
01382 
01383         double *get_QD() const
01384         {
01385                 return QD;
01386         }
01387 
01388         void swap_index(int i, int j) const
01389         {
01390                 cache->swap_index(i,j);
01391                 Kernel::swap_index(i,j);
01392                 swap(QD[i],QD[j]);
01393         }
01394 
01395         ~ONE_CLASS_Q()
01396         {
01397                 delete cache;
01398                 delete[] QD;
01399         }
01400 private:
01401         Cache *cache;
01402         double *QD;
01403 };
01404 
01405 class SVR_Q: public Kernel
01406 { 
01407 public:
01408         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01409         :Kernel(prob.l, prob.x, param)
01410         {
01411                 l = prob.l;
01412                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01413                 QD = new double[2*l];
01414                 sign = new schar[2*l];
01415                 index = new int[2*l];
01416                 for(int k=0;k<l;k++)
01417                 {
01418                         sign[k] = 1;
01419                         sign[k+l] = -1;
01420                         index[k] = k;
01421                         index[k+l] = k;
01422                         QD[k] = (this->*kernel_function)(k,k);
01423                         QD[k+l] = QD[k];
01424                 }
01425                 buffer[0] = new Qfloat[2*l];
01426                 buffer[1] = new Qfloat[2*l];
01427                 next_buffer = 0;
01428         }
01429 
01430         void swap_index(int i, int j) const
01431         {
01432                 swap(sign[i],sign[j]);
01433                 swap(index[i],index[j]);
01434                 swap(QD[i],QD[j]);
01435         }
01436         
01437         Qfloat *get_Q(int i, int len) const
01438         {
01439                 Qfloat *data;
01440                 int j, real_i = index[i];
01441                 if(cache->get_data(real_i,&data,l) < l)
01442                 {
01443                         for(j=0;j<l;j++)
01444                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01445                 }
01446 
01447                 // reorder and copy
01448                 Qfloat *buf = buffer[next_buffer];
01449                 next_buffer = 1 - next_buffer;
01450                 schar si = sign[i];
01451                 for(j=0;j<len;j++)
01452                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01453                 return buf;
01454         }
01455 
01456         double *get_QD() const
01457         {
01458                 return QD;
01459         }
01460 
01461         ~SVR_Q()
01462         {
01463                 delete cache;
01464                 delete[] sign;
01465                 delete[] index;
01466                 delete[] buffer[0];
01467                 delete[] buffer[1];
01468                 delete[] QD;
01469         }
01470 private:
01471         int l;
01472         Cache *cache;
01473         schar *sign;
01474         int *index;
01475         mutable int next_buffer;
01476         Qfloat *buffer[2];
01477         double *QD;
01478 };
01479 
01480 //
01481 // construct and solve various formulations
01482 //
01483 static void solve_c_svc(
01484         const svm_problem *prob, const svm_parameter* param,
01485         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01486 {
01487         int l = prob->l;
01488         double *minus_ones = new double[l];
01489         schar *y = new schar[l];
01490 
01491         int i;
01492 
01493         for(i=0;i<l;i++)
01494         {
01495                 alpha[i] = 0;
01496                 minus_ones[i] = -1;
01497                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01498         }
01499 
01500         Solver s;
01501         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01502                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01503 
01504         double sum_alpha=0;
01505         for(i=0;i<l;i++)
01506                 sum_alpha += alpha[i];
01507 
01508         if (Cp==Cn)
01509                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01510 
01511         for(i=0;i<l;i++)
01512                 alpha[i] *= y[i];
01513 
01514         delete[] minus_ones;
01515         delete[] y;
01516 }
01517 
01518 static void solve_nu_svc(
01519         const svm_problem *prob, const svm_parameter *param,
01520         double *alpha, Solver::SolutionInfo* si)
01521 {
01522         int i;
01523         int l = prob->l;
01524         double nu = param->nu;
01525 
01526         schar *y = new schar[l];
01527 
01528         for(i=0;i<l;i++)
01529                 if(prob->y[i]>0)
01530                         y[i] = +1;
01531                 else
01532                         y[i] = -1;
01533 
01534         double sum_pos = nu*l/2;
01535         double sum_neg = nu*l/2;
01536 
01537         for(i=0;i<l;i++)
01538                 if(y[i] == +1)
01539                 {
01540                         alpha[i] = min(1.0,sum_pos);
01541                         sum_pos -= alpha[i];
01542                 }
01543                 else
01544                 {
01545                         alpha[i] = min(1.0,sum_neg);
01546                         sum_neg -= alpha[i];
01547                 }
01548 
01549         double *zeros = new double[l];
01550 
01551         for(i=0;i<l;i++)
01552                 zeros[i] = 0;
01553 
01554         Solver_NU s;
01555         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01556                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01557         double r = si->r;
01558 
01559         info("C = %f\n",1/r);
01560 
01561         for(i=0;i<l;i++)
01562                 alpha[i] *= y[i]/r;
01563 
01564         si->rho /= r;
01565         si->obj /= (r*r);
01566         si->upper_bound_p = 1/r;
01567         si->upper_bound_n = 1/r;
01568 
01569         delete[] y;
01570         delete[] zeros;
01571 }
01572 
01573 static void solve_one_class(
01574         const svm_problem *prob, const svm_parameter *param,
01575         double *alpha, Solver::SolutionInfo* si)
01576 {
01577         int l = prob->l;
01578         double *zeros = new double[l];
01579         schar *ones = new schar[l];
01580         int i;
01581 
01582         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01583 
01584         for(i=0;i<n;i++)
01585                 alpha[i] = 1;
01586         if(n<prob->l)
01587                 alpha[n] = param->nu * prob->l - n;
01588         for(i=n+1;i<l;i++)
01589                 alpha[i] = 0;
01590 
01591         for(i=0;i<l;i++)
01592         {
01593                 zeros[i] = 0;
01594                 ones[i] = 1;
01595         }
01596 
01597         Solver s;
01598         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01599                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01600 
01601         delete[] zeros;
01602         delete[] ones;
01603 }
01604 
01605 static void solve_epsilon_svr(
01606         const svm_problem *prob, const svm_parameter *param,
01607         double *alpha, Solver::SolutionInfo* si)
01608 {
01609         int l = prob->l;
01610         double *alpha2 = new double[2*l];
01611         double *linear_term = new double[2*l];
01612         schar *y = new schar[2*l];
01613         int i;
01614 
01615         for(i=0;i<l;i++)
01616         {
01617                 alpha2[i] = 0;
01618                 linear_term[i] = param->p - prob->y[i];
01619                 y[i] = 1;
01620 
01621                 alpha2[i+l] = 0;
01622                 linear_term[i+l] = param->p + prob->y[i];
01623                 y[i+l] = -1;
01624         }
01625 
01626         Solver s;
01627         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01628                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01629 
01630         double sum_alpha = 0;
01631         for(i=0;i<l;i++)
01632         {
01633                 alpha[i] = alpha2[i] - alpha2[i+l];
01634                 sum_alpha += fabs(alpha[i]);
01635         }
01636         info("nu = %f\n",sum_alpha/(param->C*l));
01637 
01638         delete[] alpha2;
01639         delete[] linear_term;
01640         delete[] y;
01641 }
01642 
01643 static void solve_nu_svr(
01644         const svm_problem *prob, const svm_parameter *param,
01645         double *alpha, Solver::SolutionInfo* si)
01646 {
01647         int l = prob->l;
01648         double C = param->C;
01649         double *alpha2 = new double[2*l];
01650         double *linear_term = new double[2*l];
01651         schar *y = new schar[2*l];
01652         int i;
01653 
01654         double sum = C * param->nu * l / 2;
01655         for(i=0;i<l;i++)
01656         {
01657                 alpha2[i] = alpha2[i+l] = min(sum,C);
01658                 sum -= alpha2[i];
01659 
01660                 linear_term[i] = - prob->y[i];
01661                 y[i] = 1;
01662 
01663                 linear_term[i+l] = prob->y[i];
01664                 y[i+l] = -1;
01665         }
01666 
01667         Solver_NU s;
01668         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01669                 alpha2, C, C, param->eps, si, param->shrinking);
01670 
01671         info("epsilon = %f\n",-si->r);
01672 
01673         for(i=0;i<l;i++)
01674                 alpha[i] = alpha2[i] - alpha2[i+l];
01675 
01676         delete[] alpha2;
01677         delete[] linear_term;
01678         delete[] y;
01679 }
01680 
01681 //
01682 // decision_function
01683 //
01684 struct decision_function
01685 {
01686         double *alpha;
01687         double rho;
01688 };
01689 
01690 static decision_function svm_train_one(
01691         const svm_problem *prob, const svm_parameter *param,
01692         double Cp, double Cn)
01693 {
01694         double *alpha = Malloc(double,prob->l);
01695         Solver::SolutionInfo si;
01696         switch(param->svm_type)
01697         {
01698                 case C_SVC:
01699                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01700                         break;
01701                 case NU_SVC:
01702                         solve_nu_svc(prob,param,alpha,&si);
01703                         break;
01704                 case ONE_CLASS:
01705                         solve_one_class(prob,param,alpha,&si);
01706                         break;
01707                 case EPSILON_SVR:
01708                         solve_epsilon_svr(prob,param,alpha,&si);
01709                         break;
01710                 case NU_SVR:
01711                         solve_nu_svr(prob,param,alpha,&si);
01712                         break;
01713         }
01714 
01715         info("obj = %f, rho = %f\n",si.obj,si.rho);
01716 
01717         // output SVs
01718 
01719         int nSV = 0;
01720         int nBSV = 0;
01721         for(int i=0;i<prob->l;i++)
01722         {
01723                 if(fabs(alpha[i]) > 0)
01724                 {
01725                         ++nSV;
01726                         if(prob->y[i] > 0)
01727                         {
01728                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01729                                         ++nBSV;
01730                         }
01731                         else
01732                         {
01733                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01734                                         ++nBSV;
01735                         }
01736                 }
01737         }
01738 
01739         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01740 
01741         decision_function f;
01742         f.alpha = alpha;
01743         f.rho = si.rho;
01744         return f;
01745 }
01746 
01747 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01748 static void sigmoid_train(
01749         int l, const double *dec_values, const double *labels, 
01750         double& A, double& B)
01751 {
01752         double prior1=0, prior0 = 0;
01753         int i;
01754 
01755         for (i=0;i<l;i++)
01756                 if (labels[i] > 0) prior1+=1;
01757                 else prior0+=1;
01758         
01759         int max_iter=100;       // Maximal number of iterations
01760         double min_step=1e-10;  // Minimal step taken in line search
01761         double sigma=1e-12;     // For numerically strict PD of Hessian
01762         double eps=1e-5;
01763         double hiTarget=(prior1+1.0)/(prior1+2.0);
01764         double loTarget=1/(prior0+2.0);
01765         double *t=Malloc(double,l);
01766         double fApB,p,q;
01767         double newA,newB,newf,d1,d2;
01768         int iter;
01769         
01770         // Initial Point and Initial Fun Value
01771         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01772         double fval = 0.0;
01773 
01774         for (i=0;i<l;i++)
01775         {
01776                 if (labels[i]>0) t[i]=hiTarget;
01777                 else t[i]=loTarget;
01778                 fApB = dec_values[i]*A+B;
01779                 if (fApB>=0)
01780                         fval += t[i]*fApB + log1p(exp(-fApB));
01781                 else
01782                         fval += (t[i] - 1)*fApB +log1p(exp(fApB));
01783         }
01784         for (iter=0;iter<max_iter;iter++)
01785         {
01786                 // Update Gradient and Hessian (use H' = H + sigma I)
01787                 double h11=sigma; // numerically ensures strict PD
01788                 double h22=sigma;
01789                 double h21=0.0;
01790         double g1=0.0;
01791         double g2=0.0;
01792                 for (i=0;i<l;i++)
01793                 {
01794                         fApB = dec_values[i]*A+B;
01795                         if (fApB >= 0)
01796                         {
01797                                 p=exp(-fApB)/(1.0+exp(-fApB));
01798                                 q=1.0/(1.0+exp(-fApB));
01799                         }
01800                         else
01801                         {
01802                                 p=1.0/(1.0+exp(fApB));
01803                                 q=exp(fApB)/(1.0+exp(fApB));
01804                         }
01805                         d2=p*q;
01806                         h11+=dec_values[i]*dec_values[i]*d2;
01807                         h22+=d2;
01808                         h21+=dec_values[i]*d2;
01809                         d1=t[i]-p;
01810                         g1+=dec_values[i]*d1;
01811                         g2+=d1;
01812                 }
01813 
01814                 // Stopping Criteria
01815                 if (fabs(g1)<eps && fabs(g2)<eps)
01816                         break;
01817 
01818                 // Finding Newton direction: -inv(H') * g
01819                 const double det=h11*h22-h21*h21;
01820                 const double dA=-(h22*g1 - h21 * g2) / det;
01821                 const double dB=-(-h21*g1+ h11 * g2) / det;
01822                 const double gd=g1*dA+g2*dB;
01823 
01824 
01825                 double stepsize = 1;            // Line Search
01826                 while (stepsize >= min_step)
01827                 {
01828                         newA = A + stepsize * dA;
01829                         newB = B + stepsize * dB;
01830 
01831                         // New function value
01832                         newf = 0.0;
01833                         for (i=0;i<l;i++)
01834                         {
01835                                 fApB = dec_values[i]*newA+newB;
01836                                 if (fApB >= 0)
01837                                         newf += t[i]*fApB + log1p(exp(-fApB));
01838                                 else
01839                                         newf += (t[i] - 1)*fApB +log1p(exp(fApB));
01840                         }
01841                         // Check sufficient decrease
01842                         if (newf<fval+0.0001*stepsize*gd)
01843                         {
01844                                 A=newA;B=newB;fval=newf;
01845                                 break;
01846                         }
01847                         else
01848                                 stepsize = stepsize / 2.0;
01849                 }
01850 
01851                 if (stepsize < min_step)
01852                 {
01853                         info("Line search fails in two-class probability estimates\n");
01854                         break;
01855                 }
01856         }
01857 
01858         if (iter>=max_iter)
01859                 info("Reaching maximal iterations in two-class probability estimates\n");
01860         free(t);
01861 }
01862 
01863 static double sigmoid_predict(double decision_value, double A, double B)
01864 {
01865         double fApB = decision_value*A+B;
01866         // 1-p used later; avoid catastrophic cancellation
01867         if (fApB >= 0)
01868                 return exp(-fApB)/(1.0+exp(-fApB));
01869         else
01870                 return 1.0/(1+exp(fApB)) ;
01871 }
01872 
01873 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01874 static void multiclass_probability(int k, double **r, double *p)
01875 {
01876         int t,j;
01877         int iter = 0, max_iter=max(100,k);
01878         double **Q=Malloc(double *,k);
01879         double *Qp=Malloc(double,k);
01880         double eps=0.005/k;
01881         
01882         for (t=0;t<k;t++)
01883         {
01884                 p[t]=1.0/k;  // Valid if k = 1
01885                 Q[t]=Malloc(double,k);
01886                 Q[t][t]=0;
01887                 for (j=0;j<t;j++)
01888                 {
01889                         Q[t][t]+=r[j][t]*r[j][t];
01890                         Q[t][j]=Q[j][t];
01891                 }
01892                 for (j=t+1;j<k;j++)
01893                 {
01894                         Q[t][t]+=r[j][t]*r[j][t];
01895                         Q[t][j]=-r[j][t]*r[t][j];
01896                 }
01897         }
01898         for (iter=0;iter<max_iter;iter++)
01899         {
01900                 // stopping condition, recalculate QP,pQP for numerical accuracy
01901                 double pQp=0;
01902                 for (t=0;t<k;t++)
01903                 {
01904                         Qp[t]=0;
01905                         for (j=0;j<k;j++)
01906                                 Qp[t]+=Q[t][j]*p[j];
01907                         pQp+=p[t]*Qp[t];
01908                 }
01909                 double max_error=0;
01910                 for (t=0;t<k;t++)
01911                 {
01912                         double error=fabs(Qp[t]-pQp);
01913                         if (error>max_error)
01914                                 max_error=error;
01915                 }
01916                 if (max_error<eps) break;
01917                 
01918                 for (t=0;t<k;t++)
01919                 {
01920                         double diff=(-Qp[t]+pQp)/Q[t][t];
01921                         p[t]+=diff;
01922                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01923                         for (j=0;j<k;j++)
01924                         {
01925                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01926                                 p[j]/=(1+diff);
01927                         }
01928                 }
01929         }
01930         if (iter>=max_iter)
01931                 info("Exceeds max_iter in multiclass_prob\n");
01932         for(t=0;t<k;t++) free(Q[t]);
01933         free(Q);
01934         free(Qp);
01935 }
01936 
01937 // Cross-validation decision values for probability estimates
01938 static void svm_binary_svc_probability(
01939         const svm_problem *prob, const svm_parameter *param,
01940         double Cp, double Cn, double& probA, double& probB)
01941 {
01942         int i;
01943         int nr_fold = 5;
01944         int *perm = Malloc(int,prob->l);
01945         double *dec_values = Malloc(double,prob->l);
01946 
01947         // random shuffle
01948         for(i=0;i<prob->l;i++) perm[i]=i;
01949         for(i=0;i<prob->l;i++)
01950         {
01951                 int j = i+rand()%(prob->l-i);
01952                 swap(perm[i],perm[j]);
01953         }
01954         for(i=0;i<nr_fold;i++)
01955         {
01956                 int begin = i*prob->l/nr_fold;
01957                 int end = (i+1)*prob->l/nr_fold;
01958                 int j,k;
01959                 struct svm_problem subprob;
01960 
01961                 subprob.l = prob->l-(end-begin);
01962                 subprob.x = Malloc(struct svm_node*,subprob.l);
01963                 subprob.y = Malloc(double,subprob.l);
01964                         
01965                 k=0;
01966                 for(j=0;j<begin;j++)
01967                 {
01968                         subprob.x[k] = prob->x[perm[j]];
01969                         subprob.y[k] = prob->y[perm[j]];
01970                         ++k;
01971                 }
01972                 for(j=end;j<prob->l;j++)
01973                 {
01974                         subprob.x[k] = prob->x[perm[j]];
01975                         subprob.y[k] = prob->y[perm[j]];
01976                         ++k;
01977                 }
01978                 int p_count=0,n_count=0;
01979                 for(j=0;j<k;j++)
01980                         if(subprob.y[j]>0)
01981                                 p_count++;
01982                         else
01983                                 n_count++;
01984 
01985                 if(p_count==0 && n_count==0)
01986                         for(j=begin;j<end;j++)
01987                                 dec_values[perm[j]] = 0;
01988                 else if(p_count > 0 && n_count == 0)
01989                         for(j=begin;j<end;j++)
01990                                 dec_values[perm[j]] = 1;
01991                 else if(p_count == 0 && n_count > 0)
01992                         for(j=begin;j<end;j++)
01993                                 dec_values[perm[j]] = -1;
01994                 else
01995                 {
01996                         svm_parameter subparam = *param;
01997                         subparam.probability=0;
01998                         subparam.C=1.0;
01999                         subparam.nr_weight=2;
02000                         subparam.weight_label = Malloc(int,2);
02001                         subparam.weight = Malloc(double,2);
02002                         subparam.weight_label[0]=+1;
02003                         subparam.weight_label[1]=-1;
02004                         subparam.weight[0]=Cp;
02005                         subparam.weight[1]=Cn;
02006                         struct svm_model *submodel = svm_train(&subprob,&subparam);
02007                         for(j=begin;j<end;j++)
02008                         {
02009                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
02010                                 // ensure +1 -1 order; reason not using CV subroutine
02011                                 dec_values[perm[j]] *= submodel->label[0];
02012                         }               
02013                         svm_free_and_destroy_model(&submodel);
02014                         svm_destroy_param(&subparam);
02015                 }
02016                 free(subprob.x);
02017                 free(subprob.y);
02018         }               
02019         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
02020         free(dec_values);
02021         free(perm);
02022 }
02023 
02024 // Return parameter of a Laplace distribution 
02025 static double svm_svr_probability(
02026         const svm_problem *prob, const svm_parameter *param)
02027 {
02028         int i;
02029         int nr_fold = 5;
02030         double *ymv = Malloc(double,prob->l);
02031         double mae = 0;
02032 
02033         svm_parameter newparam = *param;
02034         newparam.probability = 0;
02035         svm_cross_validation(prob,&newparam,nr_fold,ymv);
02036         for(i=0;i<prob->l;i++)
02037         {
02038                 ymv[i]=prob->y[i]-ymv[i];
02039                 mae += fabs(ymv[i]);
02040         }               
02041         mae /= prob->l;
02042         double std=sqrt(2*mae*mae);
02043         int count=0;
02044         mae=0;
02045         for(i=0;i<prob->l;i++)
02046                 if (fabs(ymv[i]) > 5*std) 
02047                         count=count+1;
02048                 else 
02049                         mae+=fabs(ymv[i]);
02050         mae /= (prob->l-count);
02051         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02052         free(ymv);
02053         return mae;
02054 }
02055 
02056 
02057 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02058 // perm, length l, must be allocated before calling this subroutine
02059 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02060 {
02061         int l = prob->l;
02062         int max_nr_class = 16;
02063         int nr_class = 0;
02064         int *label = Malloc(int,max_nr_class);
02065         int *count = Malloc(int,max_nr_class);
02066         int *data_label = Malloc(int,l);
02067         int i;
02068 
02069         for(i=0;i<l;i++)
02070         {
02071                 int this_label = (int)prob->y[i];
02072                 int j;
02073                 for(j=0;j<nr_class;j++)
02074                 {
02075                         if(this_label == label[j])
02076                         {
02077                                 ++count[j];
02078                                 break;
02079                         }
02080                 }
02081                 data_label[i] = j;
02082                 if(j == nr_class)
02083                 {
02084                         if(nr_class == max_nr_class)
02085                         {
02086                                 max_nr_class *= 2;
02087                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02088                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02089                         }
02090                         label[nr_class] = this_label;
02091                         count[nr_class] = 1;
02092                         ++nr_class;
02093                 }
02094         }
02095 
02096         //
02097         // Labels are ordered by their first occurrence in the training set. 
02098         // However, for two-class sets with -1/+1 labels and -1 appears first, 
02099         // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
02100         //
02101         if (nr_class == 2 && label[0] == -1 && label[1] == 1)
02102         {
02103                 swap(label[0],label[1]);
02104                 swap(count[0],count[1]);
02105                 for(i=0;i<l;i++)
02106                 {
02107                         if(data_label[i] == 0)
02108                                 data_label[i] = 1;
02109                         else
02110                                 data_label[i] = 0;
02111                 }
02112         }
02113 
02114         int *start = Malloc(int,nr_class);
02115         start[0] = 0;
02116         for(i=1;i<nr_class;i++)
02117                 start[i] = start[i-1]+count[i-1];
02118         for(i=0;i<l;i++)
02119         {
02120                 perm[start[data_label[i]]] = i;
02121                 ++start[data_label[i]];
02122         }
02123         start[0] = 0;
02124         for(i=1;i<nr_class;i++)
02125                 start[i] = start[i-1]+count[i-1];
02126 
02127         *nr_class_ret = nr_class;
02128         *label_ret = label;
02129         *start_ret = start;
02130         *count_ret = count;
02131         free(data_label);
02132 }
02133 
02134 //
02135 // Interface functions
02136 //
02137 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02138 {
02139         svm_model *model = Malloc(svm_model,1);
02140         model->param = *param;
02141         model->free_sv = 0;     // XXX
02142 
02143         if(param->svm_type == ONE_CLASS ||
02144            param->svm_type == EPSILON_SVR ||
02145            param->svm_type == NU_SVR)
02146         {
02147                 // regression or one-class-svm
02148                 model->nr_class = 2;
02149                 model->label = NULL;
02150                 model->nSV = NULL;
02151                 model->probA = NULL; model->probB = NULL;
02152                 model->sv_coef = Malloc(double *,1);
02153 
02154                 if(param->probability && 
02155                    (param->svm_type == EPSILON_SVR ||
02156                     param->svm_type == NU_SVR))
02157                 {
02158                         model->probA = Malloc(double,1);
02159                         model->probA[0] = svm_svr_probability(prob,param);
02160                 }
02161 
02162                 decision_function f = svm_train_one(prob,param,0,0);
02163                 model->rho = Malloc(double,1);
02164                 model->rho[0] = f.rho;
02165 
02166                 int nSV = 0;
02167                 int i;
02168                 for(i=0;i<prob->l;i++)
02169                         if(fabs(f.alpha[i]) > 0) ++nSV;
02170                 model->l = nSV;
02171                 model->SV = Malloc(svm_node *,nSV);
02172                 model->sv_coef[0] = Malloc(double,nSV);
02173                 model->sv_indices = Malloc(int,nSV);
02174                 int j = 0;
02175                 for(i=0;i<prob->l;i++)
02176                         if(fabs(f.alpha[i]) > 0)
02177                         {
02178                                 model->SV[j] = prob->x[i];
02179                                 model->sv_coef[0][j] = f.alpha[i];
02180                                 model->sv_indices[j] = i+1;
02181                                 ++j;
02182                         }               
02183 
02184                 free(f.alpha);
02185         }
02186         else
02187         {
02188                 // classification
02189                 int l = prob->l;
02190                 int nr_class;
02191                 int *label = NULL;
02192                 int *start = NULL;
02193                 int *count = NULL;
02194                 int *perm = Malloc(int,l);
02195 
02196                 // group training data of the same class
02197                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02198                 if(nr_class == 1) 
02199                         info("WARNING: training data in only one class. See README for details.\n");
02200                 
02201                 svm_node **x = Malloc(svm_node *,l);
02202                 int i;
02203                 for(i=0;i<l;i++)
02204                         x[i] = prob->x[perm[i]];
02205 
02206                 // calculate weighted C
02207 
02208                 double *weighted_C = Malloc(double, nr_class);
02209                 for(i=0;i<nr_class;i++)
02210                         weighted_C[i] = param->C;
02211                 for(i=0;i<param->nr_weight;i++)
02212                 {       
02213                         int j;
02214                         for(j=0;j<nr_class;j++)
02215                                 if(param->weight_label[i] == label[j])
02216                                         break;
02217                         if(j == nr_class)
02218                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02219                         else
02220                                 weighted_C[j] *= param->weight[i];
02221                 }
02222 
02223                 // train k*(k-1)/2 models
02224                 
02225                 bool *nonzero = Malloc(bool,l);
02226                 for(i=0;i<l;i++)
02227                         nonzero[i] = false;
02228                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02229 
02230                 double *probA=NULL,*probB=NULL;
02231                 if (param->probability)
02232                 {
02233                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02234                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02235                 }
02236 
02237                 int p = 0;
02238                 for(i=0;i<nr_class;i++)
02239                         for(int j=i+1;j<nr_class;j++)
02240                         {
02241                                 svm_problem sub_prob;
02242                                 int si = start[i], sj = start[j];
02243                                 int ci = count[i], cj = count[j];
02244                                 sub_prob.l = ci+cj;
02245                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02246                                 sub_prob.y = Malloc(double,sub_prob.l);
02247                                 int k;
02248                                 for(k=0;k<ci;k++)
02249                                 {
02250                                         sub_prob.x[k] = x[si+k];
02251                                         sub_prob.y[k] = +1;
02252                                 }
02253                                 for(k=0;k<cj;k++)
02254                                 {
02255                                         sub_prob.x[ci+k] = x[sj+k];
02256                                         sub_prob.y[ci+k] = -1;
02257                                 }
02258 
02259                                 if(param->probability)
02260                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02261 
02262                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02263                                 for(k=0;k<ci;k++)
02264                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02265                                                 nonzero[si+k] = true;
02266                                 for(k=0;k<cj;k++)
02267                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02268                                                 nonzero[sj+k] = true;
02269                                 free(sub_prob.x);
02270                                 free(sub_prob.y);
02271                                 ++p;
02272                         }
02273 
02274                 // build output
02275 
02276                 model->nr_class = nr_class;
02277                 
02278                 model->label = Malloc(int,nr_class);
02279                 for(i=0;i<nr_class;i++)
02280                         model->label[i] = label[i];
02281                 
02282                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02283                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02284                         model->rho[i] = f[i].rho;
02285 
02286                 if(param->probability)
02287                 {
02288                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02289                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02290                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02291                         {
02292                                 model->probA[i] = probA[i];
02293                                 model->probB[i] = probB[i];
02294                         }
02295                 }
02296                 else
02297                 {
02298                         model->probA=NULL;
02299                         model->probB=NULL;
02300                 }
02301 
02302                 int total_sv = 0;
02303                 int *nz_count = Malloc(int,nr_class);
02304                 model->nSV = Malloc(int,nr_class);
02305                 for(i=0;i<nr_class;i++)
02306                 {
02307                         int nSV = 0;
02308                         for(int j=0;j<count[i];j++)
02309                                 if(nonzero[start[i]+j])
02310                                 {       
02311                                         ++nSV;
02312                                         ++total_sv;
02313                                 }
02314                         model->nSV[i] = nSV;
02315                         nz_count[i] = nSV;
02316                 }
02317                 
02318                 info("Total nSV = %d\n",total_sv);
02319 
02320                 model->l = total_sv;
02321                 model->SV = Malloc(svm_node *,total_sv);
02322                 model->sv_indices = Malloc(int,total_sv);
02323                 p = 0;
02324                 for(i=0;i<l;i++)
02325                         if(nonzero[i])
02326                         {
02327                                 model->SV[p] = x[i];
02328                                 model->sv_indices[p++] = perm[i] + 1;
02329                         }
02330 
02331                 int *nz_start = Malloc(int,nr_class);
02332                 nz_start[0] = 0;
02333                 for(i=1;i<nr_class;i++)
02334                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02335 
02336                 model->sv_coef = Malloc(double *,nr_class-1);
02337                 for(i=0;i<nr_class-1;i++)
02338                         model->sv_coef[i] = Malloc(double,total_sv);
02339 
02340                 p = 0;
02341                 for(i=0;i<nr_class;i++)
02342                         for(int j=i+1;j<nr_class;j++)
02343                         {
02344                                 // classifier (i,j): coefficients with
02345                                 // i are in sv_coef[j-1][nz_start[i]...],
02346                                 // j are in sv_coef[i][nz_start[j]...]
02347 
02348                                 int si = start[i];
02349                                 int sj = start[j];
02350                                 int ci = count[i];
02351                                 int cj = count[j];
02352                                 
02353                                 int q = nz_start[i];
02354                                 int k;
02355                                 for(k=0;k<ci;k++)
02356                                         if(nonzero[si+k])
02357                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02358                                 q = nz_start[j];
02359                                 for(k=0;k<cj;k++)
02360                                         if(nonzero[sj+k])
02361                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02362                                 ++p;
02363                         }
02364                 
02365                 free(label);
02366                 free(probA);
02367                 free(probB);
02368                 free(count);
02369                 free(perm);
02370                 free(start);
02371                 free(x);
02372                 free(weighted_C);
02373                 free(nonzero);
02374                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02375                         free(f[i].alpha);
02376                 free(f);
02377                 free(nz_count);
02378                 free(nz_start);
02379         }
02380         return model;
02381 }
02382 
02383 // Stratified cross validation
02384 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02385 {
02386         int i;
02387         int *fold_start;
02388         int l = prob->l;
02389         int *perm = Malloc(int,l);
02390         int nr_class;
02391         if (nr_fold > l)
02392         {
02393                 nr_fold = l;
02394                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
02395         }
02396         fold_start = Malloc(int,nr_fold+1);
02397         // stratified cv may not give leave-one-out rate
02398         // Each class to l folds -> some folds may have zero elements
02399         if((param->svm_type == C_SVC ||
02400             param->svm_type == NU_SVC) && nr_fold < l)
02401         {
02402                 int *start = NULL;
02403                 int *label = NULL;
02404                 int *count = NULL;
02405                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02406 
02407                 // random shuffle and then data grouped by fold using the array perm
02408                 int *fold_count = Malloc(int,nr_fold);
02409                 int c;
02410                 int *index = Malloc(int,l);
02411                 for(i=0;i<l;i++)
02412                         index[i]=perm[i];
02413                 for (c=0; c<nr_class; c++) 
02414                         for(i=0;i<count[c];i++)
02415                         {
02416                                 int j = i+rand()%(count[c]-i);
02417                                 swap(index[start[c]+j],index[start[c]+i]);
02418                         }
02419                 for(i=0;i<nr_fold;i++)
02420                 {
02421                         fold_count[i] = 0;
02422                         for (c=0; c<nr_class;c++)
02423                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02424                 }
02425                 fold_start[0]=0;
02426                 for (i=1;i<=nr_fold;i++)
02427                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02428                 for (c=0; c<nr_class;c++)
02429                         for(i=0;i<nr_fold;i++)
02430                         {
02431                                 int begin = start[c]+i*count[c]/nr_fold;
02432                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02433                                 for(int j=begin;j<end;j++)
02434                                 {
02435                                         perm[fold_start[i]] = index[j];
02436                                         fold_start[i]++;
02437                                 }
02438                         }
02439                 fold_start[0]=0;
02440                 for (i=1;i<=nr_fold;i++)
02441                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02442                 free(start);
02443                 free(label);
02444                 free(count);
02445                 free(index);
02446                 free(fold_count);
02447         }
02448         else
02449         {
02450                 for(i=0;i<l;i++) perm[i]=i;
02451                 for(i=0;i<l;i++)
02452                 {
02453                         int j = i+rand()%(l-i);
02454                         swap(perm[i],perm[j]);
02455                 }
02456                 for(i=0;i<=nr_fold;i++)
02457                         fold_start[i]=i*l/nr_fold;
02458         }
02459 
02460         for(i=0;i<nr_fold;i++)
02461         {
02462                 int begin = fold_start[i];
02463                 int end = fold_start[i+1];
02464                 int j,k;
02465                 struct svm_problem subprob;
02466 
02467                 subprob.l = l-(end-begin);
02468                 subprob.x = Malloc(struct svm_node*,subprob.l);
02469                 subprob.y = Malloc(double,subprob.l);
02470                         
02471                 k=0;
02472                 for(j=0;j<begin;j++)
02473                 {
02474                         subprob.x[k] = prob->x[perm[j]];
02475                         subprob.y[k] = prob->y[perm[j]];
02476                         ++k;
02477                 }
02478                 for(j=end;j<l;j++)
02479                 {
02480                         subprob.x[k] = prob->x[perm[j]];
02481                         subprob.y[k] = prob->y[perm[j]];
02482                         ++k;
02483                 }
02484                 struct svm_model *submodel = svm_train(&subprob,param);
02485                 if(param->probability && 
02486                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02487                 {
02488                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02489                         for(j=begin;j<end;j++)
02490                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02491                         free(prob_estimates);
02492                 }
02493                 else
02494                         for(j=begin;j<end;j++)
02495                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02496                 svm_free_and_destroy_model(&submodel);
02497                 free(subprob.x);
02498                 free(subprob.y);
02499         }               
02500         free(fold_start);
02501         free(perm);
02502 }
02503 
02504 
02505 int svm_get_svm_type(const svm_model *model)
02506 {
02507         return model->param.svm_type;
02508 }
02509 
02510 int svm_get_nr_class(const svm_model *model)
02511 {
02512         return model->nr_class;
02513 }
02514 
02515 void svm_get_labels(const svm_model *model, int* label)
02516 {
02517         if (model->label != NULL)
02518                 for(int i=0;i<model->nr_class;i++)
02519                         label[i] = model->label[i];
02520 }
02521 
02522 void svm_get_sv_indices(const svm_model *model, int* indices)
02523 {
02524         if (model->sv_indices != NULL)
02525                 for(int i=0;i<model->l;i++)
02526                         indices[i] = model->sv_indices[i];
02527 }
02528 
02529 int svm_get_nr_sv(const svm_model *model)
02530 {
02531         return model->l;
02532 }
02533 
02534 double svm_get_svr_probability(const svm_model *model)
02535 {
02536         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02537             model->probA!=NULL)
02538                 return model->probA[0];
02539         else
02540         {
02541                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02542                 return 0;
02543         }
02544 }
02545 
02546 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02547 {
02548         int i;
02549         if(model->param.svm_type == ONE_CLASS ||
02550            model->param.svm_type == EPSILON_SVR ||
02551            model->param.svm_type == NU_SVR)
02552         {
02553                 double *sv_coef = model->sv_coef[0];
02554                 double sum = 0;
02555                 for(i=0;i<model->l;i++)
02556                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02557                 sum -= model->rho[0];
02558                 *dec_values = sum;
02559 
02560                 if(model->param.svm_type == ONE_CLASS)
02561                         return (sum>0)?1:-1;
02562                 else
02563                         return sum;
02564         }
02565         else
02566         {
02567                 int nr_class = model->nr_class;
02568                 int l = model->l;
02569                 
02570                 double *kvalue = Malloc(double,l);
02571                 for(i=0;i<l;i++)
02572                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02573 
02574                 int *start = Malloc(int,nr_class);
02575                 start[0] = 0;
02576                 for(i=1;i<nr_class;i++)
02577                         start[i] = start[i-1]+model->nSV[i-1];
02578 
02579                 int *vote = Malloc(int,nr_class);
02580                 for(i=0;i<nr_class;i++)
02581                         vote[i] = 0;
02582 
02583                 int p=0;
02584                 for(i=0;i<nr_class;i++)
02585                         for(int j=i+1;j<nr_class;j++)
02586                         {
02587                                 double sum = 0;
02588                                 int si = start[i];
02589                                 int sj = start[j];
02590                                 int ci = model->nSV[i];
02591                                 int cj = model->nSV[j];
02592                                 
02593                                 int k;
02594                                 double *coef1 = model->sv_coef[j-1];
02595                                 double *coef2 = model->sv_coef[i];
02596                                 for(k=0;k<ci;k++)
02597                                         sum += coef1[si+k] * kvalue[si+k];
02598                                 for(k=0;k<cj;k++)
02599                                         sum += coef2[sj+k] * kvalue[sj+k];
02600                                 sum -= model->rho[p];
02601                                 dec_values[p] = sum;
02602 
02603                                 if(dec_values[p] > 0)
02604                                         ++vote[i];
02605                                 else
02606                                         ++vote[j];
02607                                 p++;
02608                         }
02609 
02610                 int vote_max_idx = 0;
02611                 for(i=1;i<nr_class;i++)
02612                         if(vote[i] > vote[vote_max_idx])
02613                                 vote_max_idx = i;
02614 
02615                 free(kvalue);
02616                 free(start);
02617                 free(vote);
02618                 return model->label[vote_max_idx];
02619         }
02620 }
02621 
02622 double svm_predict(const svm_model *model, const svm_node *x)
02623 {
02624         int nr_class = model->nr_class;
02625         double *dec_values;
02626         if(model->param.svm_type == ONE_CLASS ||
02627            model->param.svm_type == EPSILON_SVR ||
02628            model->param.svm_type == NU_SVR)
02629                 dec_values = Malloc(double, 1);
02630         else 
02631                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02632         double pred_result = svm_predict_values(model, x, dec_values);
02633         free(dec_values);
02634         return pred_result;
02635 }
02636 
02637 double svm_predict_probability(
02638         const svm_model *model, const svm_node *x, double *prob_estimates)
02639 {
02640         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02641             model->probA!=NULL && model->probB!=NULL)
02642         {
02643                 int i;
02644                 int nr_class = model->nr_class;
02645                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02646                 svm_predict_values(model, x, dec_values);
02647 
02648                 double min_prob=1e-7;
02649                 double **pairwise_prob=Malloc(double *,nr_class);
02650                 for(i=0;i<nr_class;i++)
02651                         pairwise_prob[i]=Malloc(double,nr_class);
02652                 int k=0;
02653                 for(i=0;i<nr_class;i++)
02654                         for(int j=i+1;j<nr_class;j++)
02655                         {
02656                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02657                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02658                                 k++;
02659                         }
02660                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02661 
02662                 int prob_max_idx = 0;
02663                 for(i=1;i<nr_class;i++)
02664                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02665                                 prob_max_idx = i;
02666                 for(i=0;i<nr_class;i++)
02667                         free(pairwise_prob[i]);
02668                 free(dec_values);
02669                 free(pairwise_prob);
02670                 return model->label[prob_max_idx];
02671         }
02672         else 
02673                 return svm_predict(model, x);
02674 }
02675 
02676 static const char *svm_type_table[] =
02677 {
02678         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02679 };
02680 
02681 static const char *kernel_type_table[]=
02682 {
02683         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02684 };
02685 
02686 int svm_save_model(const char *model_file_name, const svm_model *model)
02687 {
02688         FILE *fp = fopen(model_file_name,"w");
02689         if(fp==NULL) return -1;
02690 
02691         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02692         setlocale(LC_ALL, "C");
02693 
02694         const svm_parameter& param = model->param;
02695 
02696         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02697         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02698 
02699         if(param.kernel_type == POLY)
02700                 fprintf(fp,"degree %d\n", param.degree);
02701 
02702         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02703                 fprintf(fp,"gamma %g\n", param.gamma);
02704 
02705         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02706                 fprintf(fp,"coef0 %g\n", param.coef0);
02707 
02708         int nr_class = model->nr_class;
02709         int l = model->l;
02710         fprintf(fp, "nr_class %d\n", nr_class);
02711         fprintf(fp, "total_sv %d\n",l);
02712         
02713         {
02714                 fprintf(fp, "rho");
02715                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02716                         fprintf(fp," %g",model->rho[i]);
02717                 fprintf(fp, "\n");
02718         }
02719         
02720         if(model->label)
02721         {
02722                 fprintf(fp, "label");
02723                 for(int i=0;i<nr_class;i++)
02724                         fprintf(fp," %d",model->label[i]);
02725                 fprintf(fp, "\n");
02726         }
02727 
02728         if(model->probA) // regression has probA only
02729         {
02730                 fprintf(fp, "probA");
02731                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02732                         fprintf(fp," %g",model->probA[i]);
02733                 fprintf(fp, "\n");
02734         }
02735         if(model->probB)
02736         {
02737                 fprintf(fp, "probB");
02738                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02739                         fprintf(fp," %g",model->probB[i]);
02740                 fprintf(fp, "\n");
02741         }
02742 
02743         if(model->nSV)
02744         {
02745                 fprintf(fp, "nr_sv");
02746                 for(int i=0;i<nr_class;i++)
02747                         fprintf(fp," %d",model->nSV[i]);
02748                 fprintf(fp, "\n");
02749         }
02750 
02751         fprintf(fp, "SV\n");
02752         const double * const *sv_coef = model->sv_coef;
02753         const svm_node * const *SV = model->SV;
02754 
02755         for(int i=0;i<l;i++)
02756         {
02757                 for(int j=0;j<nr_class-1;j++)
02758                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02759 
02760                 const svm_node *p = SV[i];
02761 
02762                 if(param.kernel_type == PRECOMPUTED)
02763                         fprintf(fp,"0:%d ",(int)(p->value));
02764                 else
02765                         while(p->index != -1)
02766                         {
02767                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02768                                 p++;
02769                         }
02770                 fprintf(fp, "\n");
02771         }
02772 
02773         setlocale(LC_ALL, old_locale);
02774         free(old_locale);
02775 
02776         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02777         else return 0;
02778 }
02779 
02780 static char *line = NULL;
02781 static int max_line_len;
02782 
02783 static char* readline(FILE *input)
02784 {
02785         if(fgets(line,max_line_len,input) == NULL)
02786                 return NULL;
02787 
02788         while(strrchr(line,'\n') == NULL)
02789         {
02790                 max_line_len *= 2;
02791                 line = (char *) realloc(line,max_line_len);
02792                 int len = (int) strlen(line);
02793                 if(fgets(line+len,max_line_len-len,input) == NULL)
02794                         break;
02795         }
02796         return line;
02797 }
02798 
02799 //
02800 // FSCANF helps to handle fscanf failures.
02801 // Its do-while block avoids the ambiguity when
02802 // if (...)
02803 //    FSCANF();
02804 // is used
02805 //
02806 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
02807 bool read_model_header(FILE *fp, svm_model* model)
02808 {
02809         svm_parameter& param = model->param;
02810         char cmd[101];
02811         while(1)
02812         {
02813                 FSCANF(fp,"%100s",cmd);
02814 
02815                 if(strcmp(cmd,"svm_type")==0)
02816                 {
02817                         FSCANF(fp,"%100s",cmd);
02818                         int i;
02819                         for(i=0;svm_type_table[i];i++)
02820                         {
02821                                 if(strcmp(svm_type_table[i],cmd)==0)
02822                                 {
02823                                         param.svm_type=i;
02824                                         break;
02825                                 }
02826                         }
02827                         if(svm_type_table[i] == NULL)
02828                         {
02829                                 fprintf(stderr,"unknown svm type.\n");
02830                                 return false;
02831                         }
02832                 }
02833                 else if(strcmp(cmd,"kernel_type")==0)
02834                 {               
02835                         FSCANF(fp,"%100s",cmd);
02836                         int i;
02837                         for(i=0;kernel_type_table[i];i++)
02838                         {
02839                                 if(strcmp(kernel_type_table[i],cmd)==0)
02840                                 {
02841                                         param.kernel_type=i;
02842                                         break;
02843                                 }
02844                         }
02845                         if(kernel_type_table[i] == NULL)
02846                         {
02847                                 fprintf(stderr,"unknown kernel function.\n");   
02848                                 return false;
02849                         }
02850                 }
02851                 else if(strcmp(cmd,"degree")==0)
02852                         FSCANF(fp,"%d",&param.degree);
02853                 else if(strcmp(cmd,"gamma")==0)
02854                         FSCANF(fp,"%lf",&param.gamma);
02855                 else if(strcmp(cmd,"coef0")==0)
02856                         FSCANF(fp,"%lf",&param.coef0);
02857                 else if(strcmp(cmd,"nr_class")==0)
02858                         FSCANF(fp,"%d",&model->nr_class);
02859                 else if(strcmp(cmd,"total_sv")==0)
02860                         FSCANF(fp,"%d",&model->l);
02861                 else if(strcmp(cmd,"rho")==0)
02862                 {
02863                         int n = model->nr_class * (model->nr_class-1)/2;
02864                         model->rho = Malloc(double,n);
02865                         for(int i=0;i<n;i++)
02866                                 FSCANF(fp,"%lf",&model->rho[i]);
02867                 }
02868                 else if(strcmp(cmd,"label")==0)
02869                 {
02870                         int n = model->nr_class;
02871                         model->label = Malloc(int,n);
02872                         for(int i=0;i<n;i++)
02873                                 FSCANF(fp,"%d",&model->label[i]);
02874                 }
02875                 else if(strcmp(cmd,"probA")==0)
02876                 {
02877                         int n = model->nr_class * (model->nr_class-1)/2;
02878                         model->probA = Malloc(double,n);
02879                         for(int i=0;i<n;i++)
02880                                 FSCANF(fp,"%lf",&model->probA[i]);
02881                 }
02882                 else if(strcmp(cmd,"probB")==0)
02883                 {
02884                         int n = model->nr_class * (model->nr_class-1)/2;
02885                         model->probB = Malloc(double,n);
02886                         for(int i=0;i<n;i++)
02887                                 FSCANF(fp,"%lf",&model->probB[i]);
02888                 }
02889                 else if(strcmp(cmd,"nr_sv")==0)
02890                 {
02891                         int n = model->nr_class;
02892                         model->nSV = Malloc(int,n);
02893                         for(int i=0;i<n;i++)
02894                                 FSCANF(fp,"%d",&model->nSV[i]);
02895                 }
02896                 else if(strcmp(cmd,"license")==0)
02897                 {
02898                         FSCANF(fp,"%100s",cmd);
02899                         //std::cout << "License: " << cmd << std::endl;
02900                 
02901                 }
02902                 else if(strcmp(cmd,"SV")==0)
02903                 {
02904                         while(1)
02905                         {
02906                                 int c = getc(fp);
02907                                 if(c==EOF || c=='\n') break;
02908                         }
02909                         break;
02910                 }
02911                 else
02912                 {
02913                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02914                         return false;
02915                 }
02916         }
02917 
02918         return true;
02919 
02920 }
02921 
02922 svm_model *svm_load_model(const char *model_file_name)
02923 {
02924         FILE *fp = fopen(model_file_name,"rb");
02925         if(fp==NULL) return NULL;
02926 
02927         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02928         setlocale(LC_ALL, "C");
02929 
02930         // read parameters
02931 
02932         svm_model *model = Malloc(svm_model,1);
02933         model->rho = NULL;
02934         model->probA = NULL;
02935         model->probB = NULL;
02936         model->sv_indices = NULL;
02937         model->label = NULL;
02938         model->nSV = NULL;
02939         
02940         // read header
02941         if (!read_model_header(fp, model))
02942         {
02943                 fprintf(stderr, "ERROR: fscanf failed to read model\n");
02944                 setlocale(LC_ALL, old_locale);
02945                 free(old_locale);
02946                 free(model->rho);
02947                 free(model->label);
02948                 free(model->nSV);
02949                 free(model);
02950                 return NULL;
02951         }
02952         
02953         // read sv_coef and SV
02954 
02955         int elements = 0;
02956         long pos = ftell(fp);
02957 
02958         max_line_len = 1024;
02959         line = Malloc(char,max_line_len);
02960         char *p,*endptr,*idx,*val;
02961 
02962         while(readline(fp)!=NULL)
02963         {
02964                 p = strtok(line,":");
02965                 while(1)
02966                 {
02967                         p = strtok(NULL,":");
02968                         if(p == NULL)
02969                                 break;
02970                         ++elements;
02971                 }
02972         }
02973         elements += model->l;
02974 
02975         fseek(fp,pos,SEEK_SET);
02976 
02977         int m = model->nr_class - 1;
02978         int l = model->l;
02979         model->sv_coef = Malloc(double *,m);
02980         int i;
02981         for(i=0;i<m;i++)
02982                 model->sv_coef[i] = Malloc(double,l);
02983         model->SV = Malloc(svm_node*,l);
02984         svm_node *x_space = NULL;
02985         if(l>0) x_space = Malloc(svm_node,elements);
02986 
02987         int j=0;
02988         for(i=0;i<l;i++)
02989         {
02990                 readline(fp);
02991                 model->SV[i] = &x_space[j];
02992 
02993                 p = strtok(line, " \t");
02994                 model->sv_coef[0][i] = strtod(p,&endptr);
02995                 for(int k=1;k<m;k++)
02996                 {
02997                         p = strtok(NULL, " \t");
02998                         model->sv_coef[k][i] = strtod(p,&endptr);
02999                 }
03000 
03001                 while(1)
03002                 {
03003                         idx = strtok(NULL, ":");
03004                         val = strtok(NULL, " \t");
03005 
03006                         if(val == NULL)
03007                                 break;
03008                         x_space[j].index = (int) strtol(idx,&endptr,10);
03009                         x_space[j].value = strtod(val,&endptr);
03010 
03011                         ++j;
03012                 }
03013                 x_space[j++].index = -1;
03014         }
03015         free(line);
03016 
03017         setlocale(LC_ALL, old_locale);
03018         free(old_locale);
03019 
03020         if (ferror(fp) != 0 || fclose(fp) != 0)
03021                 return NULL;
03022 
03023         model->free_sv = 1;     // XXX
03024         return model;
03025 }
03026 
03027 void svm_free_model_content(svm_model* model_ptr)
03028 {
03029         if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
03030                 free((void *)(model_ptr->SV[0]));
03031         if(model_ptr->sv_coef)
03032         {
03033                 for(int i=0;i<model_ptr->nr_class-1;i++)
03034                         free(model_ptr->sv_coef[i]);
03035         }
03036 
03037         free(model_ptr->SV);
03038         model_ptr->SV = NULL;
03039 
03040         free(model_ptr->sv_coef);
03041         model_ptr->sv_coef = NULL;
03042 
03043         free(model_ptr->rho);
03044         model_ptr->rho = NULL;
03045 
03046         free(model_ptr->label);
03047         model_ptr->label= NULL;
03048 
03049         free(model_ptr->probA);
03050         model_ptr->probA = NULL;
03051 
03052         free(model_ptr->probB);
03053         model_ptr->probB= NULL;
03054 
03055         free(model_ptr->sv_indices);
03056         model_ptr->sv_indices = NULL;
03057 
03058         free(model_ptr->nSV);
03059         model_ptr->nSV = NULL;
03060 }
03061 
03062 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
03063 {
03064         if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
03065         {
03066                 svm_free_model_content(*model_ptr_ptr);
03067                 free(*model_ptr_ptr);
03068                 *model_ptr_ptr = NULL;
03069         }
03070 }
03071 
03072 void svm_destroy_param(svm_parameter* param)
03073 {
03074         free(param->weight_label);
03075         free(param->weight);
03076 }
03077 
03078 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
03079 {
03080         // svm_type
03081 
03082         int svm_type = param->svm_type;
03083         if(svm_type != C_SVC &&
03084            svm_type != NU_SVC &&
03085            svm_type != ONE_CLASS &&
03086            svm_type != EPSILON_SVR &&
03087            svm_type != NU_SVR)
03088                 return "unknown svm type";
03089         
03090         // kernel_type, degree
03091         
03092         int kernel_type = param->kernel_type;
03093         if(kernel_type != LINEAR &&
03094            kernel_type != POLY &&
03095            kernel_type != RBF &&
03096            kernel_type != SIGMOID &&
03097            kernel_type != PRECOMPUTED)
03098                 return "unknown kernel type";
03099 
03100         if(param->gamma < 0)
03101                 return "gamma < 0";
03102 
03103         if(param->degree < 0)
03104                 return "degree of polynomial kernel < 0";
03105 
03106         // cache_size,eps,C,nu,p,shrinking
03107 
03108         if(param->cache_size <= 0)
03109                 return "cache_size <= 0";
03110 
03111         if(param->eps <= 0)
03112                 return "eps <= 0";
03113 
03114         if(svm_type == C_SVC ||
03115            svm_type == EPSILON_SVR ||
03116            svm_type == NU_SVR)
03117                 if(param->C <= 0)
03118                         return "C <= 0";
03119 
03120         if(svm_type == NU_SVC ||
03121            svm_type == ONE_CLASS ||
03122            svm_type == NU_SVR)
03123                 if(param->nu <= 0 || param->nu > 1)
03124                         return "nu <= 0 or nu > 1";
03125 
03126         if(svm_type == EPSILON_SVR)
03127                 if(param->p < 0)
03128                         return "p < 0";
03129 
03130         if(param->shrinking != 0 &&
03131            param->shrinking != 1)
03132                 return "shrinking != 0 and shrinking != 1";
03133 
03134         if(param->probability != 0 &&
03135            param->probability != 1)
03136                 return "probability != 0 and probability != 1";
03137 
03138         if(param->probability == 1 &&
03139            svm_type == ONE_CLASS)
03140                 return "one-class SVM probability output not supported yet";
03141 
03142 
03143         // check whether nu-svc is feasible
03144         
03145         if(svm_type == NU_SVC)
03146         {
03147                 int l = prob->l;
03148                 int max_nr_class = 16;
03149                 int nr_class = 0;
03150                 int *label = Malloc(int,max_nr_class);
03151                 int *count = Malloc(int,max_nr_class);
03152 
03153                 int i;
03154                 for(i=0;i<l;i++)
03155                 {
03156                         int this_label = (int)prob->y[i];
03157                         int j;
03158                         for(j=0;j<nr_class;j++)
03159                                 if(this_label == label[j])
03160                                 {
03161                                         ++count[j];
03162                                         break;
03163                                 }
03164                         if(j == nr_class)
03165                         {
03166                                 if(nr_class == max_nr_class)
03167                                 {
03168                                         max_nr_class *= 2;
03169                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03170                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03171                                 }
03172                                 label[nr_class] = this_label;
03173                                 count[nr_class] = 1;
03174                                 ++nr_class;
03175                         }
03176                 }
03177         
03178                 for(i=0;i<nr_class;i++)
03179                 {
03180                         int n1 = count[i];
03181                         for(int j=i+1;j<nr_class;j++)
03182                         {
03183                                 int n2 = count[j];
03184                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03185                                 {
03186                                         free(label);
03187                                         free(count);
03188                                         return "specified nu is infeasible";
03189                                 }
03190                         }
03191                 }
03192                 free(label);
03193                 free(count);
03194         }
03195 
03196         return NULL;
03197 }
03198 
03199 int svm_check_probability_model(const svm_model *model)
03200 {
03201         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03202                 model->probA!=NULL && model->probB!=NULL) ||
03203                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03204                  model->probA!=NULL);
03205 }
03206 
03207 void svm_set_print_string_function(void (*print_func)(const char *))
03208 {
03209         if(print_func == NULL)
03210                 svm_print_string = &print_string_stdout;
03211         else
03212                 svm_print_string = print_func;
03213 }
03214 
03215 }; // namespace

Generated on 31 Aug 2015 for Hugintrunk by  doxygen 1.4.7