svm.cpp

Go to the documentation of this file.
00001 /*
00002 Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
00003 All rights reserved.
00004 
00005 Redistribution and use in source and binary forms, with or without
00006 modification, are permitted provided that the following conditions
00007 are met:
00008 
00009 1. Redistributions of source code must retain the above copyright
00010 notice, this list of conditions and the following disclaimer.
00011 
00012 2. Redistributions in binary form must reproduce the above copyright
00013 notice, this list of conditions and the following disclaimer in the
00014 documentation and/or other materials provided with the distribution.
00015 
00016 3. Neither name of copyright holders nor the names of its contributors
00017 may be used to endorse or promote products derived from this software
00018 without specific prior written permission.
00019 
00020 
00021 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00022 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00023 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00024 A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
00025 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
00026 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
00027 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
00028 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
00029 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
00030 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00031 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00032 
00033 */
00034 
00035 
00036 #include <math.h>
00037 #include <stdio.h>
00038 #include <stdlib.h>
00039 #include <ctype.h>
00040 #include <float.h>
00041 #include <string.h>
00042 #include <stdarg.h>
00043 #include <limits.h>
00044 #include <locale.h>
00045 #include "svm.h"
00046 #include <hugin_config.h>
00047 #ifndef HAVE_LOG1P
00048 #define log1p(x) log(1+x)
00049 #endif
00050 
00051 namespace celeste
00052 {
00053 // changes to original libsvm
00054 // added license to header (Tim Nugent)
00055 // added celeste namespace
00056 // switched to C++ header instead a plain C header
00057 
00058 int libsvm_version = LIBSVM_VERSION;
00059 typedef float Qfloat;
00060 typedef signed char schar;
00061 #ifndef min
00062 template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
00063 #endif
00064 #ifndef max
00065 template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
00066 #endif
00067 template <class T> static inline void swap(T& x, T& y) { T t=x; x=y; y=t; }
00068 template <class S, class T> static inline void clone(T*& dst, S* src, int n)
00069 {
00070         dst = new T[n];
00071         memcpy((void *)dst,(void *)src,sizeof(T)*n);
00072 }
00073 static inline double powi(double base, int times)
00074 {
00075         double tmp = base, ret = 1.0;
00076 
00077         for(int t=times; t>0; t/=2)
00078         {
00079                 if(t%2==1) ret*=tmp;
00080                 tmp = tmp * tmp;
00081         }
00082         return ret;
00083 }
00084 #define INF HUGE_VAL
00085 #define TAU 1e-12
00086 #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
00087 
00088 static void print_string_stdout(const char *s)
00089 {
00090         fputs(s,stdout);
00091         fflush(stdout);
00092 }
00093 static void (*svm_print_string) (const char *) = &print_string_stdout;
00094 #if 1
00095 static void info(const char *fmt,...)
00096 {
00097         char buf[BUFSIZ];
00098         va_list ap;
00099         va_start(ap,fmt);
00100         vsprintf(buf,fmt,ap);
00101         va_end(ap);
00102         (*svm_print_string)(buf);
00103 }
00104 #else
00105 static void info(const char *fmt,...) {}
00106 #endif
00107 
00108 //
00109 // Kernel Cache
00110 //
00111 // l is the number of total data items
00112 // size is the cache size limit in bytes
00113 //
00114 class Cache
00115 {
00116 public:
00117         Cache(int l,long int size);
00118         ~Cache();
00119 
00120         // request data [0,len)
00121         // return some position p where [p,len) need to be filled
00122         // (p >= len if nothing needs to be filled)
00123         int get_data(const int index, Qfloat **data, int len);
00124         void swap_index(int i, int j);
00125 private:
00126         int l;
00127         long int size;
00128         struct head_t
00129         {
00130                 head_t *prev, *next;    // a circular list
00131                 Qfloat *data;
00132                 int len;                // data[0,len) is cached in this entry
00133         };
00134 
00135         head_t *head;
00136         head_t lru_head;
00137         void lru_delete(head_t *h);
00138         void lru_insert(head_t *h);
00139 };
00140 
00141 Cache::Cache(int l_,long int size_):l(l_),size(size_)
00142 {
00143         head = (head_t *)calloc(l,sizeof(head_t));      // initialized to 0
00144         size /= sizeof(Qfloat);
00145         size -= l * sizeof(head_t) / sizeof(Qfloat);
00146         size = max(size, 2 * (long int) l);     // cache must be large enough for two columns
00147         lru_head.next = lru_head.prev = &lru_head;
00148 }
00149 
00150 Cache::~Cache()
00151 {
00152         for(head_t *h = lru_head.next; h != &lru_head; h=h->next)
00153                 free(h->data);
00154         free(head);
00155 }
00156 
00157 void Cache::lru_delete(head_t *h)
00158 {
00159         // delete from current location
00160         h->prev->next = h->next;
00161         h->next->prev = h->prev;
00162 }
00163 
00164 void Cache::lru_insert(head_t *h)
00165 {
00166         // insert to last position
00167         h->next = &lru_head;
00168         h->prev = lru_head.prev;
00169         h->prev->next = h;
00170         h->next->prev = h;
00171 }
00172 
00173 int Cache::get_data(const int index, Qfloat **data, int len)
00174 {
00175         head_t *h = &head[index];
00176         if(h->len) lru_delete(h);
00177         int more = len - h->len;
00178 
00179         if(more > 0)
00180         {
00181                 // free old space
00182                 while(size < more)
00183                 {
00184                         head_t *old = lru_head.next;
00185                         lru_delete(old);
00186                         free(old->data);
00187                         size += old->len;
00188                         old->data = 0;
00189                         old->len = 0;
00190                 }
00191 
00192                 // allocate new space
00193                 h->data = (Qfloat *)realloc(h->data,sizeof(Qfloat)*len);
00194                 size -= more;
00195                 swap(h->len,len);
00196         }
00197 
00198         lru_insert(h);
00199         *data = h->data;
00200         return len;
00201 }
00202 
00203 void Cache::swap_index(int i, int j)
00204 {
00205         if(i==j) return;
00206 
00207         if(head[i].len) lru_delete(&head[i]);
00208         if(head[j].len) lru_delete(&head[j]);
00209         swap(head[i].data,head[j].data);
00210         swap(head[i].len,head[j].len);
00211         if(head[i].len) lru_insert(&head[i]);
00212         if(head[j].len) lru_insert(&head[j]);
00213 
00214         if(i>j) swap(i,j);
00215         for(head_t *h = lru_head.next; h!=&lru_head; h=h->next)
00216         {
00217                 if(h->len > i)
00218                 {
00219                         if(h->len > j)
00220                                 swap(h->data[i],h->data[j]);
00221                         else
00222                         {
00223                                 // give up
00224                                 lru_delete(h);
00225                                 free(h->data);
00226                                 size += h->len;
00227                                 h->data = 0;
00228                                 h->len = 0;
00229                         }
00230                 }
00231         }
00232 }
00233 
00234 //
00235 // Kernel evaluation
00236 //
00237 // the static method k_function is for doing single kernel evaluation
00238 // the constructor of Kernel prepares to calculate the l*l kernel matrix
00239 // the member function get_Q is for getting one column from the Q Matrix
00240 //
00241 class QMatrix {
00242 public:
00243         virtual Qfloat *get_Q(int column, int len) const = 0;
00244         virtual double *get_QD() const = 0;
00245         virtual void swap_index(int i, int j) const = 0;
00246         virtual ~QMatrix() {}
00247 };
00248 
00249 class Kernel: public QMatrix {
00250 public:
00251         Kernel(int l, svm_node * const * x, const svm_parameter& param);
00252         virtual ~Kernel();
00253 
00254         static double k_function(const svm_node *x, const svm_node *y,
00255                                  const svm_parameter& param);
00256         virtual Qfloat *get_Q(int column, int len) const = 0;
00257         virtual double *get_QD() const = 0;
00258         virtual void swap_index(int i, int j) const     // no so const...
00259         {
00260                 swap(x[i],x[j]);
00261                 if(x_square) swap(x_square[i],x_square[j]);
00262         }
00263 protected:
00264 
00265         double (Kernel::*kernel_function)(int i, int j) const;
00266 
00267 private:
00268         const svm_node **x;
00269         double *x_square;
00270 
00271         // svm_parameter
00272         const int kernel_type;
00273         const int degree;
00274         const double gamma;
00275         const double coef0;
00276 
00277         static double dot(const svm_node *px, const svm_node *py);
00278         double kernel_linear(int i, int j) const
00279         {
00280                 return dot(x[i],x[j]);
00281         }
00282         double kernel_poly(int i, int j) const
00283         {
00284                 return powi(gamma*dot(x[i],x[j])+coef0,degree);
00285         }
00286         double kernel_rbf(int i, int j) const
00287         {
00288                 return exp(-gamma*(x_square[i]+x_square[j]-2*dot(x[i],x[j])));
00289         }
00290         double kernel_sigmoid(int i, int j) const
00291         {
00292                 return tanh(gamma*dot(x[i],x[j])+coef0);
00293         }
00294         double kernel_precomputed(int i, int j) const
00295         {
00296                 return x[i][(int)(x[j][0].value)].value;
00297         }
00298 };
00299 
00300 Kernel::Kernel(int l, svm_node * const * x_, const svm_parameter& param)
00301 :kernel_type(param.kernel_type), degree(param.degree),
00302  gamma(param.gamma), coef0(param.coef0)
00303 {
00304         switch(kernel_type)
00305         {
00306                 case LINEAR:
00307                         kernel_function = &Kernel::kernel_linear;
00308                         break;
00309                 case POLY:
00310                         kernel_function = &Kernel::kernel_poly;
00311                         break;
00312                 case RBF:
00313                         kernel_function = &Kernel::kernel_rbf;
00314                         break;
00315                 case SIGMOID:
00316                         kernel_function = &Kernel::kernel_sigmoid;
00317                         break;
00318                 case PRECOMPUTED:
00319                         kernel_function = &Kernel::kernel_precomputed;
00320                         break;
00321         }
00322 
00323         clone(x,x_,l);
00324 
00325         if(kernel_type == RBF)
00326         {
00327                 x_square = new double[l];
00328                 for(int i=0;i<l;i++)
00329                         x_square[i] = dot(x[i],x[i]);
00330         }
00331         else
00332                 x_square = 0;
00333 }
00334 
00335 Kernel::~Kernel()
00336 {
00337         delete[] x;
00338         delete[] x_square;
00339 }
00340 
00341 double Kernel::dot(const svm_node *px, const svm_node *py)
00342 {
00343         double sum = 0;
00344         while(px->index != -1 && py->index != -1)
00345         {
00346                 if(px->index == py->index)
00347                 {
00348                         sum += px->value * py->value;
00349                         ++px;
00350                         ++py;
00351                 }
00352                 else
00353                 {
00354                         if(px->index > py->index)
00355                                 ++py;
00356                         else
00357                                 ++px;
00358                 }                       
00359         }
00360         return sum;
00361 }
00362 
00363 double Kernel::k_function(const svm_node *x, const svm_node *y,
00364                           const svm_parameter& param)
00365 {
00366         switch(param.kernel_type)
00367         {
00368                 case LINEAR:
00369                         return dot(x,y);
00370                 case POLY:
00371                         return powi(param.gamma*dot(x,y)+param.coef0,param.degree);
00372                 case RBF:
00373                 {
00374                         double sum = 0;
00375                         while(x->index != -1 && y->index !=-1)
00376                         {
00377                                 if(x->index == y->index)
00378                                 {
00379                                         double d = x->value - y->value;
00380                                         sum += d*d;
00381                                         ++x;
00382                                         ++y;
00383                                 }
00384                                 else
00385                                 {
00386                                         if(x->index > y->index)
00387                                         {       
00388                                                 sum += y->value * y->value;
00389                                                 ++y;
00390                                         }
00391                                         else
00392                                         {
00393                                                 sum += x->value * x->value;
00394                                                 ++x;
00395                                         }
00396                                 }
00397                         }
00398 
00399                         while(x->index != -1)
00400                         {
00401                                 sum += x->value * x->value;
00402                                 ++x;
00403                         }
00404 
00405                         while(y->index != -1)
00406                         {
00407                                 sum += y->value * y->value;
00408                                 ++y;
00409                         }
00410                         
00411                         return exp(-param.gamma*sum);
00412                 }
00413                 case SIGMOID:
00414                         return tanh(param.gamma*dot(x,y)+param.coef0);
00415                 case PRECOMPUTED:  //x: test (validation), y: SV
00416                         return x[(int)(y->value)].value;
00417                 default:
00418                         return 0;  // Unreachable 
00419         }
00420 }
00421 
00422 // An SMO algorithm in Fan et al., JMLR 6(2005), p. 1889--1918
00423 // Solves:
00424 //
00425 //      min 0.5(\alpha^T Q \alpha) + p^T \alpha
00426 //
00427 //              y^T \alpha = \delta
00428 //              y_i = +1 or -1
00429 //              0 <= alpha_i <= Cp for y_i = 1
00430 //              0 <= alpha_i <= Cn for y_i = -1
00431 //
00432 // Given:
00433 //
00434 //      Q, p, y, Cp, Cn, and an initial feasible point \alpha
00435 //      l is the size of vectors and matrices
00436 //      eps is the stopping tolerance
00437 //
00438 // solution will be put in \alpha, objective value will be put in obj
00439 //
00440 class Solver {
00441 public:
00442         Solver() {};
00443         virtual ~Solver() {};
00444 
00445         struct SolutionInfo {
00446                 double obj;
00447                 double rho;
00448                 double upper_bound_p;
00449                 double upper_bound_n;
00450                 double r;       // for Solver_NU
00451         };
00452 
00453         void Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00454                    double *alpha_, double Cp, double Cn, double eps,
00455                    SolutionInfo* si, int shrinking);
00456 protected:
00457         int active_size;
00458         schar *y;
00459         double *G;              // gradient of objective function
00460         enum { LOWER_BOUND, UPPER_BOUND, FREE };
00461         char *alpha_status;     // LOWER_BOUND, UPPER_BOUND, FREE
00462         double *alpha;
00463         const QMatrix *Q;
00464         const double *QD;
00465         double eps;
00466         double Cp,Cn;
00467         double *p;
00468         int *active_set;
00469         double *G_bar;          // gradient, if we treat free variables as 0
00470         int l;
00471         bool unshrink;  // XXX
00472 
00473         double get_C(int i)
00474         {
00475                 return (y[i] > 0)? Cp : Cn;
00476         }
00477         void update_alpha_status(int i)
00478         {
00479                 if(alpha[i] >= get_C(i))
00480                         alpha_status[i] = UPPER_BOUND;
00481                 else if(alpha[i] <= 0)
00482                         alpha_status[i] = LOWER_BOUND;
00483                 else alpha_status[i] = FREE;
00484         }
00485         bool is_upper_bound(int i) { return alpha_status[i] == UPPER_BOUND; }
00486         bool is_lower_bound(int i) { return alpha_status[i] == LOWER_BOUND; }
00487         bool is_free(int i) { return alpha_status[i] == FREE; }
00488         void swap_index(int i, int j);
00489         void reconstruct_gradient();
00490         virtual int select_working_set(int &i, int &j);
00491         virtual double calculate_rho();
00492         virtual void do_shrinking();
00493 private:
00494         bool be_shrunk(int i, double Gmax1, double Gmax2);
00495 };
00496 
00497 void Solver::swap_index(int i, int j)
00498 {
00499         Q->swap_index(i,j);
00500         swap(y[i],y[j]);
00501         swap(G[i],G[j]);
00502         swap(alpha_status[i],alpha_status[j]);
00503         swap(alpha[i],alpha[j]);
00504         swap(p[i],p[j]);
00505         swap(active_set[i],active_set[j]);
00506         swap(G_bar[i],G_bar[j]);
00507 }
00508 
00509 void Solver::reconstruct_gradient()
00510 {
00511         // reconstruct inactive elements of G from G_bar and free variables
00512 
00513         if(active_size == l) return;
00514 
00515         int i,j;
00516         int nr_free = 0;
00517 
00518         for(j=active_size;j<l;j++)
00519                 G[j] = G_bar[j] + p[j];
00520 
00521         for(j=0;j<active_size;j++)
00522                 if(is_free(j))
00523                         nr_free++;
00524 
00525         if(2*nr_free < active_size)
00526                 info("\nWARNING: using -h 0 may be faster\n");
00527 
00528         if (nr_free*l > 2*active_size*(l-active_size))
00529         {
00530                 for(i=active_size;i<l;i++)
00531                 {
00532                         const Qfloat *Q_i = Q->get_Q(i,active_size);
00533                         for(j=0;j<active_size;j++)
00534                                 if(is_free(j))
00535                                         G[i] += alpha[j] * Q_i[j];
00536                 }
00537         }
00538         else
00539         {
00540                 for(i=0;i<active_size;i++)
00541                         if(is_free(i))
00542                         {
00543                                 const Qfloat *Q_i = Q->get_Q(i,l);
00544                                 double alpha_i = alpha[i];
00545                                 for(j=active_size;j<l;j++)
00546                                         G[j] += alpha_i * Q_i[j];
00547                         }
00548         }
00549 }
00550 
00551 void Solver::Solve(int l, const QMatrix& Q, const double *p_, const schar *y_,
00552                    double *alpha_, double Cp, double Cn, double eps,
00553                    SolutionInfo* si, int shrinking)
00554 {
00555         this->l = l;
00556         this->Q = &Q;
00557         QD=Q.get_QD();
00558         clone(p, p_,l);
00559         clone(y, y_,l);
00560         clone(alpha,alpha_,l);
00561         this->Cp = Cp;
00562         this->Cn = Cn;
00563         this->eps = eps;
00564         unshrink = false;
00565 
00566         // initialize alpha_status
00567         {
00568                 alpha_status = new char[l];
00569                 for(int i=0;i<l;i++)
00570                         update_alpha_status(i);
00571         }
00572 
00573         // initialize active set (for shrinking)
00574         {
00575                 active_set = new int[l];
00576                 for(int i=0;i<l;i++)
00577                         active_set[i] = i;
00578                 active_size = l;
00579         }
00580 
00581         // initialize gradient
00582         {
00583                 G = new double[l];
00584                 G_bar = new double[l];
00585                 int i;
00586                 for(i=0;i<l;i++)
00587                 {
00588                         G[i] = p[i];
00589                         G_bar[i] = 0;
00590                 }
00591                 for(i=0;i<l;i++)
00592                         if(!is_lower_bound(i))
00593                         {
00594                                 const Qfloat *Q_i = Q.get_Q(i,l);
00595                                 double alpha_i = alpha[i];
00596                                 int j;
00597                                 for(j=0;j<l;j++)
00598                                         G[j] += alpha_i*Q_i[j];
00599                                 if(is_upper_bound(i))
00600                                         for(j=0;j<l;j++)
00601                                                 G_bar[j] += get_C(i) * Q_i[j];
00602                         }
00603         }
00604 
00605         // optimization step
00606 
00607         int iter = 0;
00608         int max_iter = max(10000000, l>INT_MAX/100 ? INT_MAX : 100*l);
00609         int counter = min(l,1000)+1;
00610         
00611         while(iter < max_iter)
00612         {
00613                 // show progress and do shrinking
00614 
00615                 if(--counter == 0)
00616                 {
00617                         counter = min(l,1000);
00618                         if(shrinking) do_shrinking();
00619                         info(".");
00620                 }
00621 
00622                 int i,j;
00623                 if(select_working_set(i,j)!=0)
00624                 {
00625                         // reconstruct the whole gradient
00626                         reconstruct_gradient();
00627                         // reset active set size and check
00628                         active_size = l;
00629                         info("*");
00630                         if(select_working_set(i,j)!=0)
00631                                 break;
00632                         else
00633                                 counter = 1;    // do shrinking next iteration
00634                 }
00635                 
00636                 ++iter;
00637 
00638                 // update alpha[i] and alpha[j], handle bounds carefully
00639                 
00640                 const Qfloat *Q_i = Q.get_Q(i,active_size);
00641                 const Qfloat *Q_j = Q.get_Q(j,active_size);
00642 
00643                 double C_i = get_C(i);
00644                 double C_j = get_C(j);
00645 
00646                 double old_alpha_i = alpha[i];
00647                 double old_alpha_j = alpha[j];
00648 
00649                 if(y[i]!=y[j])
00650                 {
00651                         double quad_coef = QD[i]+QD[j]+2*Q_i[j];
00652                         if (quad_coef <= 0)
00653                                 quad_coef = TAU;
00654                         double delta = (-G[i]-G[j])/quad_coef;
00655                         double diff = alpha[i] - alpha[j];
00656                         alpha[i] += delta;
00657                         alpha[j] += delta;
00658                         
00659                         if(diff > 0)
00660                         {
00661                                 if(alpha[j] < 0)
00662                                 {
00663                                         alpha[j] = 0;
00664                                         alpha[i] = diff;
00665                                 }
00666                         }
00667                         else
00668                         {
00669                                 if(alpha[i] < 0)
00670                                 {
00671                                         alpha[i] = 0;
00672                                         alpha[j] = -diff;
00673                                 }
00674                         }
00675                         if(diff > C_i - C_j)
00676                         {
00677                                 if(alpha[i] > C_i)
00678                                 {
00679                                         alpha[i] = C_i;
00680                                         alpha[j] = C_i - diff;
00681                                 }
00682                         }
00683                         else
00684                         {
00685                                 if(alpha[j] > C_j)
00686                                 {
00687                                         alpha[j] = C_j;
00688                                         alpha[i] = C_j + diff;
00689                                 }
00690                         }
00691                 }
00692                 else
00693                 {
00694                         double quad_coef = QD[i]+QD[j]-2*Q_i[j];
00695                         if (quad_coef <= 0)
00696                                 quad_coef = TAU;
00697                         double delta = (G[i]-G[j])/quad_coef;
00698                         double sum = alpha[i] + alpha[j];
00699                         alpha[i] -= delta;
00700                         alpha[j] += delta;
00701 
00702                         if(sum > C_i)
00703                         {
00704                                 if(alpha[i] > C_i)
00705                                 {
00706                                         alpha[i] = C_i;
00707                                         alpha[j] = sum - C_i;
00708                                 }
00709                         }
00710                         else
00711                         {
00712                                 if(alpha[j] < 0)
00713                                 {
00714                                         alpha[j] = 0;
00715                                         alpha[i] = sum;
00716                                 }
00717                         }
00718                         if(sum > C_j)
00719                         {
00720                                 if(alpha[j] > C_j)
00721                                 {
00722                                         alpha[j] = C_j;
00723                                         alpha[i] = sum - C_j;
00724                                 }
00725                         }
00726                         else
00727                         {
00728                                 if(alpha[i] < 0)
00729                                 {
00730                                         alpha[i] = 0;
00731                                         alpha[j] = sum;
00732                                 }
00733                         }
00734                 }
00735 
00736                 // update G
00737 
00738                 double delta_alpha_i = alpha[i] - old_alpha_i;
00739                 double delta_alpha_j = alpha[j] - old_alpha_j;
00740                 
00741                 for(int k=0;k<active_size;k++)
00742                 {
00743                         G[k] += Q_i[k]*delta_alpha_i + Q_j[k]*delta_alpha_j;
00744                 }
00745 
00746                 // update alpha_status and G_bar
00747 
00748                 {
00749                         bool ui = is_upper_bound(i);
00750                         bool uj = is_upper_bound(j);
00751                         update_alpha_status(i);
00752                         update_alpha_status(j);
00753                         int k;
00754                         if(ui != is_upper_bound(i))
00755                         {
00756                                 Q_i = Q.get_Q(i,l);
00757                                 if(ui)
00758                                         for(k=0;k<l;k++)
00759                                                 G_bar[k] -= C_i * Q_i[k];
00760                                 else
00761                                         for(k=0;k<l;k++)
00762                                                 G_bar[k] += C_i * Q_i[k];
00763                         }
00764 
00765                         if(uj != is_upper_bound(j))
00766                         {
00767                                 Q_j = Q.get_Q(j,l);
00768                                 if(uj)
00769                                         for(k=0;k<l;k++)
00770                                                 G_bar[k] -= C_j * Q_j[k];
00771                                 else
00772                                         for(k=0;k<l;k++)
00773                                                 G_bar[k] += C_j * Q_j[k];
00774                         }
00775                 }
00776         }
00777 
00778         if(iter >= max_iter)
00779         {
00780                 if(active_size < l)
00781                 {
00782                         // reconstruct the whole gradient to calculate objective value
00783                         reconstruct_gradient();
00784                         active_size = l;
00785                         info("*");
00786                 }
00787                 fprintf(stderr,"\nWARNING: reaching max number of iterations\n");
00788         }
00789 
00790         // calculate rho
00791 
00792         si->rho = calculate_rho();
00793 
00794         // calculate objective value
00795         {
00796                 double v = 0;
00797                 int i;
00798                 for(i=0;i<l;i++)
00799                         v += alpha[i] * (G[i] + p[i]);
00800 
00801                 si->obj = v/2;
00802         }
00803 
00804         // put back the solution
00805         {
00806                 for(int i=0;i<l;i++)
00807                         alpha_[active_set[i]] = alpha[i];
00808         }
00809 
00810         // juggle everything back
00811         /*{
00812                 for(int i=0;i<l;i++)
00813                         while(active_set[i] != i)
00814                                 swap_index(i,active_set[i]);
00815                                 // or Q.swap_index(i,active_set[i]);
00816         }*/
00817 
00818         si->upper_bound_p = Cp;
00819         si->upper_bound_n = Cn;
00820 
00821         info("\noptimization finished, #iter = %d\n",iter);
00822 
00823         delete[] p;
00824         delete[] y;
00825         delete[] alpha;
00826         delete[] alpha_status;
00827         delete[] active_set;
00828         delete[] G;
00829         delete[] G_bar;
00830 }
00831 
00832 // return 1 if already optimal, return 0 otherwise
00833 int Solver::select_working_set(int &out_i, int &out_j)
00834 {
00835         // return i,j such that
00836         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
00837         // j: minimizes the decrease of obj value
00838         //    (if quadratic coefficeint <= 0, replace it with tau)
00839         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
00840         
00841         double Gmax = -INF;
00842         double Gmax2 = -INF;
00843         int Gmax_idx = -1;
00844         int Gmin_idx = -1;
00845         double obj_diff_min = INF;
00846 
00847         for(int t=0;t<active_size;t++)
00848                 if(y[t]==+1)    
00849                 {
00850                         if(!is_upper_bound(t))
00851                                 if(-G[t] >= Gmax)
00852                                 {
00853                                         Gmax = -G[t];
00854                                         Gmax_idx = t;
00855                                 }
00856                 }
00857                 else
00858                 {
00859                         if(!is_lower_bound(t))
00860                                 if(G[t] >= Gmax)
00861                                 {
00862                                         Gmax = G[t];
00863                                         Gmax_idx = t;
00864                                 }
00865                 }
00866 
00867         int i = Gmax_idx;
00868         const Qfloat *Q_i = NULL;
00869         if(i != -1) // NULL Q_i not accessed: Gmax=-INF if i=-1
00870                 Q_i = Q->get_Q(i,active_size);
00871 
00872         for(int j=0;j<active_size;j++)
00873         {
00874                 if(y[j]==+1)
00875                 {
00876                         if (!is_lower_bound(j))
00877                         {
00878                                 double grad_diff=Gmax+G[j];
00879                                 if (G[j] >= Gmax2)
00880                                         Gmax2 = G[j];
00881                                 if (grad_diff > 0)
00882                                 {
00883                                         double obj_diff;
00884                                         double quad_coef = QD[i]+QD[j]-2.0*y[i]*Q_i[j];
00885                                         if (quad_coef > 0)
00886                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00887                                         else
00888                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00889 
00890                                         if (obj_diff <= obj_diff_min)
00891                                         {
00892                                                 Gmin_idx=j;
00893                                                 obj_diff_min = obj_diff;
00894                                         }
00895                                 }
00896                         }
00897                 }
00898                 else
00899                 {
00900                         if (!is_upper_bound(j))
00901                         {
00902                                 double grad_diff= Gmax-G[j];
00903                                 if (-G[j] >= Gmax2)
00904                                         Gmax2 = -G[j];
00905                                 if (grad_diff > 0)
00906                                 {
00907                                         double obj_diff;
00908                                         double quad_coef = QD[i]+QD[j]+2.0*y[i]*Q_i[j];
00909                                         if (quad_coef > 0)
00910                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
00911                                         else
00912                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
00913 
00914                                         if (obj_diff <= obj_diff_min)
00915                                         {
00916                                                 Gmin_idx=j;
00917                                                 obj_diff_min = obj_diff;
00918                                         }
00919                                 }
00920                         }
00921                 }
00922         }
00923 
00924         if(Gmax+Gmax2 < eps)
00925                 return 1;
00926 
00927         out_i = Gmax_idx;
00928         out_j = Gmin_idx;
00929         return 0;
00930 }
00931 
00932 bool Solver::be_shrunk(int i, double Gmax1, double Gmax2)
00933 {
00934         if(is_upper_bound(i))
00935         {
00936                 if(y[i]==+1)
00937                         return(-G[i] > Gmax1);
00938                 else
00939                         return(-G[i] > Gmax2);
00940         }
00941         else if(is_lower_bound(i))
00942         {
00943                 if(y[i]==+1)
00944                         return(G[i] > Gmax2);
00945                 else    
00946                         return(G[i] > Gmax1);
00947         }
00948         else
00949                 return(false);
00950 }
00951 
00952 void Solver::do_shrinking()
00953 {
00954         int i;
00955         double Gmax1 = -INF;            // max { -y_i * grad(f)_i | i in I_up(\alpha) }
00956         double Gmax2 = -INF;            // max { y_i * grad(f)_i | i in I_low(\alpha) }
00957 
00958         // find maximal violating pair first
00959         for(i=0;i<active_size;i++)
00960         {
00961                 if(y[i]==+1)    
00962                 {
00963                         if(!is_upper_bound(i))  
00964                         {
00965                                 if(-G[i] >= Gmax1)
00966                                         Gmax1 = -G[i];
00967                         }
00968                         if(!is_lower_bound(i))  
00969                         {
00970                                 if(G[i] >= Gmax2)
00971                                         Gmax2 = G[i];
00972                         }
00973                 }
00974                 else    
00975                 {
00976                         if(!is_upper_bound(i))  
00977                         {
00978                                 if(-G[i] >= Gmax2)
00979                                         Gmax2 = -G[i];
00980                         }
00981                         if(!is_lower_bound(i))  
00982                         {
00983                                 if(G[i] >= Gmax1)
00984                                         Gmax1 = G[i];
00985                         }
00986                 }
00987         }
00988 
00989         if(unshrink == false && Gmax1 + Gmax2 <= eps*10) 
00990         {
00991                 unshrink = true;
00992                 reconstruct_gradient();
00993                 active_size = l;
00994                 info("*");
00995         }
00996 
00997         for(i=0;i<active_size;i++)
00998                 if (be_shrunk(i, Gmax1, Gmax2))
00999                 {
01000                         active_size--;
01001                         while (active_size > i)
01002                         {
01003                                 if (!be_shrunk(active_size, Gmax1, Gmax2))
01004                                 {
01005                                         swap_index(i,active_size);
01006                                         break;
01007                                 }
01008                                 active_size--;
01009                         }
01010                 }
01011 }
01012 
01013 double Solver::calculate_rho()
01014 {
01015         double r;
01016         int nr_free = 0;
01017         double ub = INF, lb = -INF, sum_free = 0;
01018         for(int i=0;i<active_size;i++)
01019         {
01020                 double yG = y[i]*G[i];
01021 
01022                 if(is_upper_bound(i))
01023                 {
01024                         if(y[i]==-1)
01025                                 ub = min(ub,yG);
01026                         else
01027                                 lb = max(lb,yG);
01028                 }
01029                 else if(is_lower_bound(i))
01030                 {
01031                         if(y[i]==+1)
01032                                 ub = min(ub,yG);
01033                         else
01034                                 lb = max(lb,yG);
01035                 }
01036                 else
01037                 {
01038                         ++nr_free;
01039                         sum_free += yG;
01040                 }
01041         }
01042 
01043         if(nr_free>0)
01044                 r = sum_free/nr_free;
01045         else
01046                 r = (ub+lb)/2;
01047 
01048         return r;
01049 }
01050 
01051 //
01052 // Solver for nu-svm classification and regression
01053 //
01054 // additional constraint: e^T \alpha = constant
01055 //
01056 class Solver_NU: public Solver
01057 {
01058 public:
01059         Solver_NU() {}
01060         void Solve(int l, const QMatrix& Q, const double *p, const schar *y,
01061                    double *alpha, double Cp, double Cn, double eps,
01062                    SolutionInfo* si, int shrinking)
01063         {
01064                 this->si = si;
01065                 Solver::Solve(l,Q,p,y,alpha,Cp,Cn,eps,si,shrinking);
01066         }
01067 private:
01068         SolutionInfo *si;
01069         int select_working_set(int &i, int &j);
01070         double calculate_rho();
01071         bool be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4);
01072         void do_shrinking();
01073 };
01074 
01075 // return 1 if already optimal, return 0 otherwise
01076 int Solver_NU::select_working_set(int &out_i, int &out_j)
01077 {
01078         // return i,j such that y_i = y_j and
01079         // i: maximizes -y_i * grad(f)_i, i in I_up(\alpha)
01080         // j: minimizes the decrease of obj value
01081         //    (if quadratic coefficeint <= 0, replace it with tau)
01082         //    -y_j*grad(f)_j < -y_i*grad(f)_i, j in I_low(\alpha)
01083 
01084         double Gmaxp = -INF;
01085         double Gmaxp2 = -INF;
01086         int Gmaxp_idx = -1;
01087 
01088         double Gmaxn = -INF;
01089         double Gmaxn2 = -INF;
01090         int Gmaxn_idx = -1;
01091 
01092         int Gmin_idx = -1;
01093         double obj_diff_min = INF;
01094 
01095         for(int t=0;t<active_size;t++)
01096                 if(y[t]==+1)
01097                 {
01098                         if(!is_upper_bound(t))
01099                                 if(-G[t] >= Gmaxp)
01100                                 {
01101                                         Gmaxp = -G[t];
01102                                         Gmaxp_idx = t;
01103                                 }
01104                 }
01105                 else
01106                 {
01107                         if(!is_lower_bound(t))
01108                                 if(G[t] >= Gmaxn)
01109                                 {
01110                                         Gmaxn = G[t];
01111                                         Gmaxn_idx = t;
01112                                 }
01113                 }
01114 
01115         int ip = Gmaxp_idx;
01116         int in = Gmaxn_idx;
01117         const Qfloat *Q_ip = NULL;
01118         const Qfloat *Q_in = NULL;
01119         if(ip != -1) // NULL Q_ip not accessed: Gmaxp=-INF if ip=-1
01120                 Q_ip = Q->get_Q(ip,active_size);
01121         if(in != -1)
01122                 Q_in = Q->get_Q(in,active_size);
01123 
01124         for(int j=0;j<active_size;j++)
01125         {
01126                 if(y[j]==+1)
01127                 {
01128                         if (!is_lower_bound(j)) 
01129                         {
01130                                 double grad_diff=Gmaxp+G[j];
01131                                 if (G[j] >= Gmaxp2)
01132                                         Gmaxp2 = G[j];
01133                                 if (grad_diff > 0)
01134                                 {
01135                                         double obj_diff;
01136                                         double quad_coef = QD[ip]+QD[j]-2*Q_ip[j];
01137                                         if (quad_coef > 0)
01138                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01139                                         else
01140                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01141 
01142                                         if (obj_diff <= obj_diff_min)
01143                                         {
01144                                                 Gmin_idx=j;
01145                                                 obj_diff_min = obj_diff;
01146                                         }
01147                                 }
01148                         }
01149                 }
01150                 else
01151                 {
01152                         if (!is_upper_bound(j))
01153                         {
01154                                 double grad_diff=Gmaxn-G[j];
01155                                 if (-G[j] >= Gmaxn2)
01156                                         Gmaxn2 = -G[j];
01157                                 if (grad_diff > 0)
01158                                 {
01159                                         double obj_diff;
01160                                         double quad_coef = QD[in]+QD[j]-2*Q_in[j];
01161                                         if (quad_coef > 0)
01162                                                 obj_diff = -(grad_diff*grad_diff)/quad_coef;
01163                                         else
01164                                                 obj_diff = -(grad_diff*grad_diff)/TAU;
01165 
01166                                         if (obj_diff <= obj_diff_min)
01167                                         {
01168                                                 Gmin_idx=j;
01169                                                 obj_diff_min = obj_diff;
01170                                         }
01171                                 }
01172                         }
01173                 }
01174         }
01175 
01176         if(max(Gmaxp+Gmaxp2,Gmaxn+Gmaxn2) < eps)
01177                 return 1;
01178 
01179         if (y[Gmin_idx] == +1)
01180                 out_i = Gmaxp_idx;
01181         else
01182                 out_i = Gmaxn_idx;
01183         out_j = Gmin_idx;
01184 
01185         return 0;
01186 }
01187 
01188 bool Solver_NU::be_shrunk(int i, double Gmax1, double Gmax2, double Gmax3, double Gmax4)
01189 {
01190         if(is_upper_bound(i))
01191         {
01192                 if(y[i]==+1)
01193                         return(-G[i] > Gmax1);
01194                 else    
01195                         return(-G[i] > Gmax4);
01196         }
01197         else if(is_lower_bound(i))
01198         {
01199                 if(y[i]==+1)
01200                         return(G[i] > Gmax2);
01201                 else    
01202                         return(G[i] > Gmax3);
01203         }
01204         else
01205                 return(false);
01206 }
01207 
01208 void Solver_NU::do_shrinking()
01209 {
01210         double Gmax1 = -INF;    // max { -y_i * grad(f)_i | y_i = +1, i in I_up(\alpha) }
01211         double Gmax2 = -INF;    // max { y_i * grad(f)_i | y_i = +1, i in I_low(\alpha) }
01212         double Gmax3 = -INF;    // max { -y_i * grad(f)_i | y_i = -1, i in I_up(\alpha) }
01213         double Gmax4 = -INF;    // max { y_i * grad(f)_i | y_i = -1, i in I_low(\alpha) }
01214 
01215         // find maximal violating pair first
01216         int i;
01217         for(i=0;i<active_size;i++)
01218         {
01219                 if(!is_upper_bound(i))
01220                 {
01221                         if(y[i]==+1)
01222                         {
01223                                 if(-G[i] > Gmax1) Gmax1 = -G[i];
01224                         }
01225                         else    if(-G[i] > Gmax4) Gmax4 = -G[i];
01226                 }
01227                 if(!is_lower_bound(i))
01228                 {
01229                         if(y[i]==+1)
01230                         {       
01231                                 if(G[i] > Gmax2) Gmax2 = G[i];
01232                         }
01233                         else    if(G[i] > Gmax3) Gmax3 = G[i];
01234                 }
01235         }
01236 
01237         if(unshrink == false && max(Gmax1+Gmax2,Gmax3+Gmax4) <= eps*10) 
01238         {
01239                 unshrink = true;
01240                 reconstruct_gradient();
01241                 active_size = l;
01242         }
01243 
01244         for(i=0;i<active_size;i++)
01245                 if (be_shrunk(i, Gmax1, Gmax2, Gmax3, Gmax4))
01246                 {
01247                         active_size--;
01248                         while (active_size > i)
01249                         {
01250                                 if (!be_shrunk(active_size, Gmax1, Gmax2, Gmax3, Gmax4))
01251                                 {
01252                                         swap_index(i,active_size);
01253                                         break;
01254                                 }
01255                                 active_size--;
01256                         }
01257                 }
01258 }
01259 
01260 double Solver_NU::calculate_rho()
01261 {
01262         int nr_free1 = 0,nr_free2 = 0;
01263         double ub1 = INF, ub2 = INF;
01264         double lb1 = -INF, lb2 = -INF;
01265         double sum_free1 = 0, sum_free2 = 0;
01266 
01267         for(int i=0;i<active_size;i++)
01268         {
01269                 if(y[i]==+1)
01270                 {
01271                         if(is_upper_bound(i))
01272                                 lb1 = max(lb1,G[i]);
01273                         else if(is_lower_bound(i))
01274                                 ub1 = min(ub1,G[i]);
01275                         else
01276                         {
01277                                 ++nr_free1;
01278                                 sum_free1 += G[i];
01279                         }
01280                 }
01281                 else
01282                 {
01283                         if(is_upper_bound(i))
01284                                 lb2 = max(lb2,G[i]);
01285                         else if(is_lower_bound(i))
01286                                 ub2 = min(ub2,G[i]);
01287                         else
01288                         {
01289                                 ++nr_free2;
01290                                 sum_free2 += G[i];
01291                         }
01292                 }
01293         }
01294 
01295         double r1,r2;
01296         if(nr_free1 > 0)
01297                 r1 = sum_free1/nr_free1;
01298         else
01299                 r1 = (ub1+lb1)/2;
01300         
01301         if(nr_free2 > 0)
01302                 r2 = sum_free2/nr_free2;
01303         else
01304                 r2 = (ub2+lb2)/2;
01305         
01306         si->r = (r1+r2)/2;
01307         return (r1-r2)/2;
01308 }
01309 
01310 //
01311 // Q matrices for various formulations
01312 //
01313 class SVC_Q: public Kernel
01314 { 
01315 public:
01316         SVC_Q(const svm_problem& prob, const svm_parameter& param, const schar *y_)
01317         :Kernel(prob.l, prob.x, param)
01318         {
01319                 clone(y,y_,prob.l);
01320                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01321                 QD = new double[prob.l];
01322                 for(int i=0;i<prob.l;i++)
01323                         QD[i] = (this->*kernel_function)(i,i);
01324         }
01325         
01326         Qfloat *get_Q(int i, int len) const
01327         {
01328                 Qfloat *data;
01329                 int start;
01330                 if((start = cache->get_data(i,&data,len)) < len)
01331                 {
01332                         for(int j=start;j<len;j++)
01333                                 data[j] = (Qfloat)(y[i]*y[j]*(this->*kernel_function)(i,j));
01334                 }
01335                 return data;
01336         }
01337 
01338         double *get_QD() const
01339         {
01340                 return QD;
01341         }
01342 
01343         void swap_index(int i, int j) const
01344         {
01345                 cache->swap_index(i,j);
01346                 Kernel::swap_index(i,j);
01347                 swap(y[i],y[j]);
01348                 swap(QD[i],QD[j]);
01349         }
01350 
01351         ~SVC_Q()
01352         {
01353                 delete[] y;
01354                 delete cache;
01355                 delete[] QD;
01356         }
01357 private:
01358         schar *y;
01359         Cache *cache;
01360         double *QD;
01361 };
01362 
01363 class ONE_CLASS_Q: public Kernel
01364 {
01365 public:
01366         ONE_CLASS_Q(const svm_problem& prob, const svm_parameter& param)
01367         :Kernel(prob.l, prob.x, param)
01368         {
01369                 cache = new Cache(prob.l,(long int)(param.cache_size*(1<<20)));
01370                 QD = new double[prob.l];
01371                 for(int i=0;i<prob.l;i++)
01372                         QD[i] = (this->*kernel_function)(i,i);
01373         }
01374         
01375         Qfloat *get_Q(int i, int len) const
01376         {
01377                 Qfloat *data;
01378                 int start;
01379                 if((start = cache->get_data(i,&data,len)) < len)
01380                 {
01381                         for(int j=start;j<len;j++)
01382                                 data[j] = (Qfloat)(this->*kernel_function)(i,j);
01383                 }
01384                 return data;
01385         }
01386 
01387         double *get_QD() const
01388         {
01389                 return QD;
01390         }
01391 
01392         void swap_index(int i, int j) const
01393         {
01394                 cache->swap_index(i,j);
01395                 Kernel::swap_index(i,j);
01396                 swap(QD[i],QD[j]);
01397         }
01398 
01399         ~ONE_CLASS_Q()
01400         {
01401                 delete cache;
01402                 delete[] QD;
01403         }
01404 private:
01405         Cache *cache;
01406         double *QD;
01407 };
01408 
01409 class SVR_Q: public Kernel
01410 { 
01411 public:
01412         SVR_Q(const svm_problem& prob, const svm_parameter& param)
01413         :Kernel(prob.l, prob.x, param)
01414         {
01415                 l = prob.l;
01416                 cache = new Cache(l,(long int)(param.cache_size*(1<<20)));
01417                 QD = new double[2*l];
01418                 sign = new schar[2*l];
01419                 index = new int[2*l];
01420                 for(int k=0;k<l;k++)
01421                 {
01422                         sign[k] = 1;
01423                         sign[k+l] = -1;
01424                         index[k] = k;
01425                         index[k+l] = k;
01426                         QD[k] = (this->*kernel_function)(k,k);
01427                         QD[k+l] = QD[k];
01428                 }
01429                 buffer[0] = new Qfloat[2*l];
01430                 buffer[1] = new Qfloat[2*l];
01431                 next_buffer = 0;
01432         }
01433 
01434         void swap_index(int i, int j) const
01435         {
01436                 swap(sign[i],sign[j]);
01437                 swap(index[i],index[j]);
01438                 swap(QD[i],QD[j]);
01439         }
01440         
01441         Qfloat *get_Q(int i, int len) const
01442         {
01443                 Qfloat *data;
01444                 int j, real_i = index[i];
01445                 if(cache->get_data(real_i,&data,l) < l)
01446                 {
01447                         for(j=0;j<l;j++)
01448                                 data[j] = (Qfloat)(this->*kernel_function)(real_i,j);
01449                 }
01450 
01451                 // reorder and copy
01452                 Qfloat *buf = buffer[next_buffer];
01453                 next_buffer = 1 - next_buffer;
01454                 schar si = sign[i];
01455                 for(j=0;j<len;j++)
01456                         buf[j] = (Qfloat) si * (Qfloat) sign[j] * data[index[j]];
01457                 return buf;
01458         }
01459 
01460         double *get_QD() const
01461         {
01462                 return QD;
01463         }
01464 
01465         ~SVR_Q()
01466         {
01467                 delete cache;
01468                 delete[] sign;
01469                 delete[] index;
01470                 delete[] buffer[0];
01471                 delete[] buffer[1];
01472                 delete[] QD;
01473         }
01474 private:
01475         int l;
01476         Cache *cache;
01477         schar *sign;
01478         int *index;
01479         mutable int next_buffer;
01480         Qfloat *buffer[2];
01481         double *QD;
01482 };
01483 
01484 //
01485 // construct and solve various formulations
01486 //
01487 static void solve_c_svc(
01488         const svm_problem *prob, const svm_parameter* param,
01489         double *alpha, Solver::SolutionInfo* si, double Cp, double Cn)
01490 {
01491         int l = prob->l;
01492         double *minus_ones = new double[l];
01493         schar *y = new schar[l];
01494 
01495         int i;
01496 
01497         for(i=0;i<l;i++)
01498         {
01499                 alpha[i] = 0;
01500                 minus_ones[i] = -1;
01501                 if(prob->y[i] > 0) y[i] = +1; else y[i] = -1;
01502         }
01503 
01504         Solver s;
01505         s.Solve(l, SVC_Q(*prob,*param,y), minus_ones, y,
01506                 alpha, Cp, Cn, param->eps, si, param->shrinking);
01507 
01508         double sum_alpha=0;
01509         for(i=0;i<l;i++)
01510                 sum_alpha += alpha[i];
01511 
01512         if (Cp==Cn)
01513                 info("nu = %f\n", sum_alpha/(Cp*prob->l));
01514 
01515         for(i=0;i<l;i++)
01516                 alpha[i] *= y[i];
01517 
01518         delete[] minus_ones;
01519         delete[] y;
01520 }
01521 
01522 static void solve_nu_svc(
01523         const svm_problem *prob, const svm_parameter *param,
01524         double *alpha, Solver::SolutionInfo* si)
01525 {
01526         int i;
01527         int l = prob->l;
01528         double nu = param->nu;
01529 
01530         schar *y = new schar[l];
01531 
01532         for(i=0;i<l;i++)
01533                 if(prob->y[i]>0)
01534                         y[i] = +1;
01535                 else
01536                         y[i] = -1;
01537 
01538         double sum_pos = nu*l/2;
01539         double sum_neg = nu*l/2;
01540 
01541         for(i=0;i<l;i++)
01542                 if(y[i] == +1)
01543                 {
01544                         alpha[i] = min(1.0,sum_pos);
01545                         sum_pos -= alpha[i];
01546                 }
01547                 else
01548                 {
01549                         alpha[i] = min(1.0,sum_neg);
01550                         sum_neg -= alpha[i];
01551                 }
01552 
01553         double *zeros = new double[l];
01554 
01555         for(i=0;i<l;i++)
01556                 zeros[i] = 0;
01557 
01558         Solver_NU s;
01559         s.Solve(l, SVC_Q(*prob,*param,y), zeros, y,
01560                 alpha, 1.0, 1.0, param->eps, si,  param->shrinking);
01561         double r = si->r;
01562 
01563         info("C = %f\n",1/r);
01564 
01565         for(i=0;i<l;i++)
01566                 alpha[i] *= y[i]/r;
01567 
01568         si->rho /= r;
01569         si->obj /= (r*r);
01570         si->upper_bound_p = 1/r;
01571         si->upper_bound_n = 1/r;
01572 
01573         delete[] y;
01574         delete[] zeros;
01575 }
01576 
01577 static void solve_one_class(
01578         const svm_problem *prob, const svm_parameter *param,
01579         double *alpha, Solver::SolutionInfo* si)
01580 {
01581         int l = prob->l;
01582         double *zeros = new double[l];
01583         schar *ones = new schar[l];
01584         int i;
01585 
01586         int n = (int)(param->nu*prob->l);       // # of alpha's at upper bound
01587 
01588         for(i=0;i<n;i++)
01589                 alpha[i] = 1;
01590         if(n<prob->l)
01591                 alpha[n] = param->nu * prob->l - n;
01592         for(i=n+1;i<l;i++)
01593                 alpha[i] = 0;
01594 
01595         for(i=0;i<l;i++)
01596         {
01597                 zeros[i] = 0;
01598                 ones[i] = 1;
01599         }
01600 
01601         Solver s;
01602         s.Solve(l, ONE_CLASS_Q(*prob,*param), zeros, ones,
01603                 alpha, 1.0, 1.0, param->eps, si, param->shrinking);
01604 
01605         delete[] zeros;
01606         delete[] ones;
01607 }
01608 
01609 static void solve_epsilon_svr(
01610         const svm_problem *prob, const svm_parameter *param,
01611         double *alpha, Solver::SolutionInfo* si)
01612 {
01613         int l = prob->l;
01614         double *alpha2 = new double[2*l];
01615         double *linear_term = new double[2*l];
01616         schar *y = new schar[2*l];
01617         int i;
01618 
01619         for(i=0;i<l;i++)
01620         {
01621                 alpha2[i] = 0;
01622                 linear_term[i] = param->p - prob->y[i];
01623                 y[i] = 1;
01624 
01625                 alpha2[i+l] = 0;
01626                 linear_term[i+l] = param->p + prob->y[i];
01627                 y[i+l] = -1;
01628         }
01629 
01630         Solver s;
01631         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01632                 alpha2, param->C, param->C, param->eps, si, param->shrinking);
01633 
01634         double sum_alpha = 0;
01635         for(i=0;i<l;i++)
01636         {
01637                 alpha[i] = alpha2[i] - alpha2[i+l];
01638                 sum_alpha += fabs(alpha[i]);
01639         }
01640         info("nu = %f\n",sum_alpha/(param->C*l));
01641 
01642         delete[] alpha2;
01643         delete[] linear_term;
01644         delete[] y;
01645 }
01646 
01647 static void solve_nu_svr(
01648         const svm_problem *prob, const svm_parameter *param,
01649         double *alpha, Solver::SolutionInfo* si)
01650 {
01651         int l = prob->l;
01652         double C = param->C;
01653         double *alpha2 = new double[2*l];
01654         double *linear_term = new double[2*l];
01655         schar *y = new schar[2*l];
01656         int i;
01657 
01658         double sum = C * param->nu * l / 2;
01659         for(i=0;i<l;i++)
01660         {
01661                 alpha2[i] = alpha2[i+l] = min(sum,C);
01662                 sum -= alpha2[i];
01663 
01664                 linear_term[i] = - prob->y[i];
01665                 y[i] = 1;
01666 
01667                 linear_term[i+l] = prob->y[i];
01668                 y[i+l] = -1;
01669         }
01670 
01671         Solver_NU s;
01672         s.Solve(2*l, SVR_Q(*prob,*param), linear_term, y,
01673                 alpha2, C, C, param->eps, si, param->shrinking);
01674 
01675         info("epsilon = %f\n",-si->r);
01676 
01677         for(i=0;i<l;i++)
01678                 alpha[i] = alpha2[i] - alpha2[i+l];
01679 
01680         delete[] alpha2;
01681         delete[] linear_term;
01682         delete[] y;
01683 }
01684 
01685 //
01686 // decision_function
01687 //
01688 struct decision_function
01689 {
01690         double *alpha;
01691         double rho;
01692 };
01693 
01694 static decision_function svm_train_one(
01695         const svm_problem *prob, const svm_parameter *param,
01696         double Cp, double Cn)
01697 {
01698         double *alpha = Malloc(double,prob->l);
01699         Solver::SolutionInfo si;
01700         switch(param->svm_type)
01701         {
01702                 case C_SVC:
01703                         solve_c_svc(prob,param,alpha,&si,Cp,Cn);
01704                         break;
01705                 case NU_SVC:
01706                         solve_nu_svc(prob,param,alpha,&si);
01707                         break;
01708                 case ONE_CLASS:
01709                         solve_one_class(prob,param,alpha,&si);
01710                         break;
01711                 case EPSILON_SVR:
01712                         solve_epsilon_svr(prob,param,alpha,&si);
01713                         break;
01714                 case NU_SVR:
01715                         solve_nu_svr(prob,param,alpha,&si);
01716                         break;
01717         }
01718 
01719         info("obj = %f, rho = %f\n",si.obj,si.rho);
01720 
01721         // output SVs
01722 
01723         int nSV = 0;
01724         int nBSV = 0;
01725         for(int i=0;i<prob->l;i++)
01726         {
01727                 if(fabs(alpha[i]) > 0)
01728                 {
01729                         ++nSV;
01730                         if(prob->y[i] > 0)
01731                         {
01732                                 if(fabs(alpha[i]) >= si.upper_bound_p)
01733                                         ++nBSV;
01734                         }
01735                         else
01736                         {
01737                                 if(fabs(alpha[i]) >= si.upper_bound_n)
01738                                         ++nBSV;
01739                         }
01740                 }
01741         }
01742 
01743         info("nSV = %d, nBSV = %d\n",nSV,nBSV);
01744 
01745         decision_function f;
01746         f.alpha = alpha;
01747         f.rho = si.rho;
01748         return f;
01749 }
01750 
01751 // Platt's binary SVM Probablistic Output: an improvement from Lin et al.
01752 static void sigmoid_train(
01753         int l, const double *dec_values, const double *labels, 
01754         double& A, double& B)
01755 {
01756         double prior1=0, prior0 = 0;
01757         int i;
01758 
01759         for (i=0;i<l;i++)
01760                 if (labels[i] > 0) prior1+=1;
01761                 else prior0+=1;
01762         
01763         int max_iter=100;       // Maximal number of iterations
01764         double min_step=1e-10;  // Minimal step taken in line search
01765         double sigma=1e-12;     // For numerically strict PD of Hessian
01766         double eps=1e-5;
01767         double hiTarget=(prior1+1.0)/(prior1+2.0);
01768         double loTarget=1/(prior0+2.0);
01769         double *t=Malloc(double,l);
01770         double fApB,p,q;
01771         double newA,newB,newf,d1,d2;
01772         int iter;
01773         
01774         // Initial Point and Initial Fun Value
01775         A=0.0; B=log((prior0+1.0)/(prior1+1.0));
01776         double fval = 0.0;
01777 
01778         for (i=0;i<l;i++)
01779         {
01780                 if (labels[i]>0) t[i]=hiTarget;
01781                 else t[i]=loTarget;
01782                 fApB = dec_values[i]*A+B;
01783                 if (fApB>=0)
01784                         fval += t[i]*fApB + log1p(exp(-fApB));
01785                 else
01786                         fval += (t[i] - 1)*fApB +log1p(exp(fApB));
01787         }
01788         for (iter=0;iter<max_iter;iter++)
01789         {
01790                 // Update Gradient and Hessian (use H' = H + sigma I)
01791                 double h11=sigma; // numerically ensures strict PD
01792                 double h22=sigma;
01793                 double h21=0.0;
01794         double g1=0.0;
01795         double g2=0.0;
01796                 for (i=0;i<l;i++)
01797                 {
01798                         fApB = dec_values[i]*A+B;
01799                         if (fApB >= 0)
01800                         {
01801                                 p=exp(-fApB)/(1.0+exp(-fApB));
01802                                 q=1.0/(1.0+exp(-fApB));
01803                         }
01804                         else
01805                         {
01806                                 p=1.0/(1.0+exp(fApB));
01807                                 q=exp(fApB)/(1.0+exp(fApB));
01808                         }
01809                         d2=p*q;
01810                         h11+=dec_values[i]*dec_values[i]*d2;
01811                         h22+=d2;
01812                         h21+=dec_values[i]*d2;
01813                         d1=t[i]-p;
01814                         g1+=dec_values[i]*d1;
01815                         g2+=d1;
01816                 }
01817 
01818                 // Stopping Criteria
01819                 if (fabs(g1)<eps && fabs(g2)<eps)
01820                         break;
01821 
01822                 // Finding Newton direction: -inv(H') * g
01823                 const double det=h11*h22-h21*h21;
01824                 const double dA=-(h22*g1 - h21 * g2) / det;
01825                 const double dB=-(-h21*g1+ h11 * g2) / det;
01826                 const double gd=g1*dA+g2*dB;
01827 
01828 
01829                 double stepsize = 1;            // Line Search
01830                 while (stepsize >= min_step)
01831                 {
01832                         newA = A + stepsize * dA;
01833                         newB = B + stepsize * dB;
01834 
01835                         // New function value
01836                         newf = 0.0;
01837                         for (i=0;i<l;i++)
01838                         {
01839                                 fApB = dec_values[i]*newA+newB;
01840                                 if (fApB >= 0)
01841                                         newf += t[i]*fApB + log1p(exp(-fApB));
01842                                 else
01843                                         newf += (t[i] - 1)*fApB +log1p(exp(fApB));
01844                         }
01845                         // Check sufficient decrease
01846                         if (newf<fval+0.0001*stepsize*gd)
01847                         {
01848                                 A=newA;B=newB;fval=newf;
01849                                 break;
01850                         }
01851                         else
01852                                 stepsize = stepsize / 2.0;
01853                 }
01854 
01855                 if (stepsize < min_step)
01856                 {
01857                         info("Line search fails in two-class probability estimates\n");
01858                         break;
01859                 }
01860         }
01861 
01862         if (iter>=max_iter)
01863                 info("Reaching maximal iterations in two-class probability estimates\n");
01864         free(t);
01865 }
01866 
01867 static double sigmoid_predict(double decision_value, double A, double B)
01868 {
01869         double fApB = decision_value*A+B;
01870         // 1-p used later; avoid catastrophic cancellation
01871         if (fApB >= 0)
01872                 return exp(-fApB)/(1.0+exp(-fApB));
01873         else
01874                 return 1.0/(1+exp(fApB)) ;
01875 }
01876 
01877 // Method 2 from the multiclass_prob paper by Wu, Lin, and Weng
01878 static void multiclass_probability(int k, double **r, double *p)
01879 {
01880         int t,j;
01881         int iter = 0, max_iter=max(100,k);
01882         double **Q=Malloc(double *,k);
01883         double *Qp=Malloc(double,k);
01884         double eps=0.005/k;
01885         
01886         for (t=0;t<k;t++)
01887         {
01888                 p[t]=1.0/k;  // Valid if k = 1
01889                 Q[t]=Malloc(double,k);
01890                 Q[t][t]=0;
01891                 for (j=0;j<t;j++)
01892                 {
01893                         Q[t][t]+=r[j][t]*r[j][t];
01894                         Q[t][j]=Q[j][t];
01895                 }
01896                 for (j=t+1;j<k;j++)
01897                 {
01898                         Q[t][t]+=r[j][t]*r[j][t];
01899                         Q[t][j]=-r[j][t]*r[t][j];
01900                 }
01901         }
01902         for (iter=0;iter<max_iter;iter++)
01903         {
01904                 // stopping condition, recalculate QP,pQP for numerical accuracy
01905                 double pQp=0;
01906                 for (t=0;t<k;t++)
01907                 {
01908                         Qp[t]=0;
01909                         for (j=0;j<k;j++)
01910                                 Qp[t]+=Q[t][j]*p[j];
01911                         pQp+=p[t]*Qp[t];
01912                 }
01913                 double max_error=0;
01914                 for (t=0;t<k;t++)
01915                 {
01916                         double error=fabs(Qp[t]-pQp);
01917                         if (error>max_error)
01918                                 max_error=error;
01919                 }
01920                 if (max_error<eps) break;
01921                 
01922                 for (t=0;t<k;t++)
01923                 {
01924                         double diff=(-Qp[t]+pQp)/Q[t][t];
01925                         p[t]+=diff;
01926                         pQp=(pQp+diff*(diff*Q[t][t]+2*Qp[t]))/(1+diff)/(1+diff);
01927                         for (j=0;j<k;j++)
01928                         {
01929                                 Qp[j]=(Qp[j]+diff*Q[t][j])/(1+diff);
01930                                 p[j]/=(1+diff);
01931                         }
01932                 }
01933         }
01934         if (iter>=max_iter)
01935                 info("Exceeds max_iter in multiclass_prob\n");
01936         for(t=0;t<k;t++) free(Q[t]);
01937         free(Q);
01938         free(Qp);
01939 }
01940 
01941 // Cross-validation decision values for probability estimates
01942 static void svm_binary_svc_probability(
01943         const svm_problem *prob, const svm_parameter *param,
01944         double Cp, double Cn, double& probA, double& probB)
01945 {
01946         int i;
01947         int nr_fold = 5;
01948         int *perm = Malloc(int,prob->l);
01949         double *dec_values = Malloc(double,prob->l);
01950 
01951         // random shuffle
01952         for(i=0;i<prob->l;i++) perm[i]=i;
01953         for(i=0;i<prob->l;i++)
01954         {
01955                 int j = i+rand()%(prob->l-i);
01956                 swap(perm[i],perm[j]);
01957         }
01958         for(i=0;i<nr_fold;i++)
01959         {
01960                 int begin = i*prob->l/nr_fold;
01961                 int end = (i+1)*prob->l/nr_fold;
01962                 int j,k;
01963                 struct svm_problem subprob;
01964 
01965                 subprob.l = prob->l-(end-begin);
01966                 subprob.x = Malloc(struct svm_node*,subprob.l);
01967                 subprob.y = Malloc(double,subprob.l);
01968                         
01969                 k=0;
01970                 for(j=0;j<begin;j++)
01971                 {
01972                         subprob.x[k] = prob->x[perm[j]];
01973                         subprob.y[k] = prob->y[perm[j]];
01974                         ++k;
01975                 }
01976                 for(j=end;j<prob->l;j++)
01977                 {
01978                         subprob.x[k] = prob->x[perm[j]];
01979                         subprob.y[k] = prob->y[perm[j]];
01980                         ++k;
01981                 }
01982                 int p_count=0,n_count=0;
01983                 for(j=0;j<k;j++)
01984                         if(subprob.y[j]>0)
01985                                 p_count++;
01986                         else
01987                                 n_count++;
01988 
01989                 if(p_count==0 && n_count==0)
01990                         for(j=begin;j<end;j++)
01991                                 dec_values[perm[j]] = 0;
01992                 else if(p_count > 0 && n_count == 0)
01993                         for(j=begin;j<end;j++)
01994                                 dec_values[perm[j]] = 1;
01995                 else if(p_count == 0 && n_count > 0)
01996                         for(j=begin;j<end;j++)
01997                                 dec_values[perm[j]] = -1;
01998                 else
01999                 {
02000                         svm_parameter subparam = *param;
02001                         subparam.probability=0;
02002                         subparam.C=1.0;
02003                         subparam.nr_weight=2;
02004                         subparam.weight_label = Malloc(int,2);
02005                         subparam.weight = Malloc(double,2);
02006                         subparam.weight_label[0]=+1;
02007                         subparam.weight_label[1]=-1;
02008                         subparam.weight[0]=Cp;
02009                         subparam.weight[1]=Cn;
02010                         struct svm_model *submodel = svm_train(&subprob,&subparam);
02011                         for(j=begin;j<end;j++)
02012                         {
02013                                 svm_predict_values(submodel,prob->x[perm[j]],&(dec_values[perm[j]]));
02014                                 // ensure +1 -1 order; reason not using CV subroutine
02015                                 dec_values[perm[j]] *= submodel->label[0];
02016                         }               
02017                         svm_free_and_destroy_model(&submodel);
02018                         svm_destroy_param(&subparam);
02019                 }
02020                 free(subprob.x);
02021                 free(subprob.y);
02022         }               
02023         sigmoid_train(prob->l,dec_values,prob->y,probA,probB);
02024         free(dec_values);
02025         free(perm);
02026 }
02027 
02028 // Return parameter of a Laplace distribution 
02029 static double svm_svr_probability(
02030         const svm_problem *prob, const svm_parameter *param)
02031 {
02032         int i;
02033         int nr_fold = 5;
02034         double *ymv = Malloc(double,prob->l);
02035         double mae = 0;
02036 
02037         svm_parameter newparam = *param;
02038         newparam.probability = 0;
02039         svm_cross_validation(prob,&newparam,nr_fold,ymv);
02040         for(i=0;i<prob->l;i++)
02041         {
02042                 ymv[i]=prob->y[i]-ymv[i];
02043                 mae += fabs(ymv[i]);
02044         }               
02045         mae /= prob->l;
02046         double std=sqrt(2*mae*mae);
02047         int count=0;
02048         mae=0;
02049         for(i=0;i<prob->l;i++)
02050                 if (fabs(ymv[i]) > 5*std) 
02051                         count=count+1;
02052                 else 
02053                         mae+=fabs(ymv[i]);
02054         mae /= (prob->l-count);
02055         info("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma= %g\n",mae);
02056         free(ymv);
02057         return mae;
02058 }
02059 
02060 
02061 // label: label name, start: begin of each class, count: #data of classes, perm: indices to the original data
02062 // perm, length l, must be allocated before calling this subroutine
02063 static void svm_group_classes(const svm_problem *prob, int *nr_class_ret, int **label_ret, int **start_ret, int **count_ret, int *perm)
02064 {
02065         int l = prob->l;
02066         int max_nr_class = 16;
02067         int nr_class = 0;
02068         int *label = Malloc(int,max_nr_class);
02069         int *count = Malloc(int,max_nr_class);
02070         int *data_label = Malloc(int,l);
02071         int i;
02072 
02073         for(i=0;i<l;i++)
02074         {
02075                 int this_label = (int)prob->y[i];
02076                 int j;
02077                 for(j=0;j<nr_class;j++)
02078                 {
02079                         if(this_label == label[j])
02080                         {
02081                                 ++count[j];
02082                                 break;
02083                         }
02084                 }
02085                 data_label[i] = j;
02086                 if(j == nr_class)
02087                 {
02088                         if(nr_class == max_nr_class)
02089                         {
02090                                 max_nr_class *= 2;
02091                                 label = (int *)realloc(label,max_nr_class*sizeof(int));
02092                                 count = (int *)realloc(count,max_nr_class*sizeof(int));
02093                         }
02094                         label[nr_class] = this_label;
02095                         count[nr_class] = 1;
02096                         ++nr_class;
02097                 }
02098         }
02099 
02100         //
02101         // Labels are ordered by their first occurrence in the training set. 
02102         // However, for two-class sets with -1/+1 labels and -1 appears first, 
02103         // we swap labels to ensure that internally the binary SVM has positive data corresponding to the +1 instances.
02104         //
02105         if (nr_class == 2 && label[0] == -1 && label[1] == 1)
02106         {
02107                 swap(label[0],label[1]);
02108                 swap(count[0],count[1]);
02109                 for(i=0;i<l;i++)
02110                 {
02111                         if(data_label[i] == 0)
02112                                 data_label[i] = 1;
02113                         else
02114                                 data_label[i] = 0;
02115                 }
02116         }
02117 
02118         int *start = Malloc(int,nr_class);
02119         start[0] = 0;
02120         for(i=1;i<nr_class;i++)
02121                 start[i] = start[i-1]+count[i-1];
02122         for(i=0;i<l;i++)
02123         {
02124                 perm[start[data_label[i]]] = i;
02125                 ++start[data_label[i]];
02126         }
02127         start[0] = 0;
02128         for(i=1;i<nr_class;i++)
02129                 start[i] = start[i-1]+count[i-1];
02130 
02131         *nr_class_ret = nr_class;
02132         *label_ret = label;
02133         *start_ret = start;
02134         *count_ret = count;
02135         free(data_label);
02136 }
02137 
02138 //
02139 // Interface functions
02140 //
02141 svm_model *svm_train(const svm_problem *prob, const svm_parameter *param)
02142 {
02143         svm_model *model = Malloc(svm_model,1);
02144         model->param = *param;
02145         model->free_sv = 0;     // XXX
02146 
02147         if(param->svm_type == ONE_CLASS ||
02148            param->svm_type == EPSILON_SVR ||
02149            param->svm_type == NU_SVR)
02150         {
02151                 // regression or one-class-svm
02152                 model->nr_class = 2;
02153                 model->label = NULL;
02154                 model->nSV = NULL;
02155                 model->probA = NULL; model->probB = NULL;
02156                 model->sv_coef = Malloc(double *,1);
02157 
02158                 if(param->probability && 
02159                    (param->svm_type == EPSILON_SVR ||
02160                     param->svm_type == NU_SVR))
02161                 {
02162                         model->probA = Malloc(double,1);
02163                         model->probA[0] = svm_svr_probability(prob,param);
02164                 }
02165 
02166                 decision_function f = svm_train_one(prob,param,0,0);
02167                 model->rho = Malloc(double,1);
02168                 model->rho[0] = f.rho;
02169 
02170                 int nSV = 0;
02171                 int i;
02172                 for(i=0;i<prob->l;i++)
02173                         if(fabs(f.alpha[i]) > 0) ++nSV;
02174                 model->l = nSV;
02175                 model->SV = Malloc(svm_node *,nSV);
02176                 model->sv_coef[0] = Malloc(double,nSV);
02177                 model->sv_indices = Malloc(int,nSV);
02178                 int j = 0;
02179                 for(i=0;i<prob->l;i++)
02180                         if(fabs(f.alpha[i]) > 0)
02181                         {
02182                                 model->SV[j] = prob->x[i];
02183                                 model->sv_coef[0][j] = f.alpha[i];
02184                                 model->sv_indices[j] = i+1;
02185                                 ++j;
02186                         }               
02187 
02188                 free(f.alpha);
02189         }
02190         else
02191         {
02192                 // classification
02193                 int l = prob->l;
02194                 int nr_class;
02195                 int *label = NULL;
02196                 int *start = NULL;
02197                 int *count = NULL;
02198                 int *perm = Malloc(int,l);
02199 
02200                 // group training data of the same class
02201                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02202                 if(nr_class == 1) 
02203                         info("WARNING: training data in only one class. See README for details.\n");
02204                 
02205                 svm_node **x = Malloc(svm_node *,l);
02206                 int i;
02207                 for(i=0;i<l;i++)
02208                         x[i] = prob->x[perm[i]];
02209 
02210                 // calculate weighted C
02211 
02212                 double *weighted_C = Malloc(double, nr_class);
02213                 for(i=0;i<nr_class;i++)
02214                         weighted_C[i] = param->C;
02215                 for(i=0;i<param->nr_weight;i++)
02216                 {       
02217                         int j;
02218                         for(j=0;j<nr_class;j++)
02219                                 if(param->weight_label[i] == label[j])
02220                                         break;
02221                         if(j == nr_class)
02222                                 fprintf(stderr,"WARNING: class label %d specified in weight is not found\n", param->weight_label[i]);
02223                         else
02224                                 weighted_C[j] *= param->weight[i];
02225                 }
02226 
02227                 // train k*(k-1)/2 models
02228                 
02229                 bool *nonzero = Malloc(bool,l);
02230                 for(i=0;i<l;i++)
02231                         nonzero[i] = false;
02232                 decision_function *f = Malloc(decision_function,nr_class*(nr_class-1)/2);
02233 
02234                 double *probA=NULL,*probB=NULL;
02235                 if (param->probability)
02236                 {
02237                         probA=Malloc(double,nr_class*(nr_class-1)/2);
02238                         probB=Malloc(double,nr_class*(nr_class-1)/2);
02239                 }
02240 
02241                 int p = 0;
02242                 for(i=0;i<nr_class;i++)
02243                         for(int j=i+1;j<nr_class;j++)
02244                         {
02245                                 svm_problem sub_prob;
02246                                 int si = start[i], sj = start[j];
02247                                 int ci = count[i], cj = count[j];
02248                                 sub_prob.l = ci+cj;
02249                                 sub_prob.x = Malloc(svm_node *,sub_prob.l);
02250                                 sub_prob.y = Malloc(double,sub_prob.l);
02251                                 int k;
02252                                 for(k=0;k<ci;k++)
02253                                 {
02254                                         sub_prob.x[k] = x[si+k];
02255                                         sub_prob.y[k] = +1;
02256                                 }
02257                                 for(k=0;k<cj;k++)
02258                                 {
02259                                         sub_prob.x[ci+k] = x[sj+k];
02260                                         sub_prob.y[ci+k] = -1;
02261                                 }
02262 
02263                                 if(param->probability)
02264                                         svm_binary_svc_probability(&sub_prob,param,weighted_C[i],weighted_C[j],probA[p],probB[p]);
02265 
02266                                 f[p] = svm_train_one(&sub_prob,param,weighted_C[i],weighted_C[j]);
02267                                 for(k=0;k<ci;k++)
02268                                         if(!nonzero[si+k] && fabs(f[p].alpha[k]) > 0)
02269                                                 nonzero[si+k] = true;
02270                                 for(k=0;k<cj;k++)
02271                                         if(!nonzero[sj+k] && fabs(f[p].alpha[ci+k]) > 0)
02272                                                 nonzero[sj+k] = true;
02273                                 free(sub_prob.x);
02274                                 free(sub_prob.y);
02275                                 ++p;
02276                         }
02277 
02278                 // build output
02279 
02280                 model->nr_class = nr_class;
02281                 
02282                 model->label = Malloc(int,nr_class);
02283                 for(i=0;i<nr_class;i++)
02284                         model->label[i] = label[i];
02285                 
02286                 model->rho = Malloc(double,nr_class*(nr_class-1)/2);
02287                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02288                         model->rho[i] = f[i].rho;
02289 
02290                 if(param->probability)
02291                 {
02292                         model->probA = Malloc(double,nr_class*(nr_class-1)/2);
02293                         model->probB = Malloc(double,nr_class*(nr_class-1)/2);
02294                         for(i=0;i<nr_class*(nr_class-1)/2;i++)
02295                         {
02296                                 model->probA[i] = probA[i];
02297                                 model->probB[i] = probB[i];
02298                         }
02299                 }
02300                 else
02301                 {
02302                         model->probA=NULL;
02303                         model->probB=NULL;
02304                 }
02305 
02306                 int total_sv = 0;
02307                 int *nz_count = Malloc(int,nr_class);
02308                 model->nSV = Malloc(int,nr_class);
02309                 for(i=0;i<nr_class;i++)
02310                 {
02311                         int nSV = 0;
02312                         for(int j=0;j<count[i];j++)
02313                                 if(nonzero[start[i]+j])
02314                                 {       
02315                                         ++nSV;
02316                                         ++total_sv;
02317                                 }
02318                         model->nSV[i] = nSV;
02319                         nz_count[i] = nSV;
02320                 }
02321                 
02322                 info("Total nSV = %d\n",total_sv);
02323 
02324                 model->l = total_sv;
02325                 model->SV = Malloc(svm_node *,total_sv);
02326                 model->sv_indices = Malloc(int,total_sv);
02327                 p = 0;
02328                 for(i=0;i<l;i++)
02329                         if(nonzero[i])
02330                         {
02331                                 model->SV[p] = x[i];
02332                                 model->sv_indices[p++] = perm[i] + 1;
02333                         }
02334 
02335                 int *nz_start = Malloc(int,nr_class);
02336                 nz_start[0] = 0;
02337                 for(i=1;i<nr_class;i++)
02338                         nz_start[i] = nz_start[i-1]+nz_count[i-1];
02339 
02340                 model->sv_coef = Malloc(double *,nr_class-1);
02341                 for(i=0;i<nr_class-1;i++)
02342                         model->sv_coef[i] = Malloc(double,total_sv);
02343 
02344                 p = 0;
02345                 for(i=0;i<nr_class;i++)
02346                         for(int j=i+1;j<nr_class;j++)
02347                         {
02348                                 // classifier (i,j): coefficients with
02349                                 // i are in sv_coef[j-1][nz_start[i]...],
02350                                 // j are in sv_coef[i][nz_start[j]...]
02351 
02352                                 int si = start[i];
02353                                 int sj = start[j];
02354                                 int ci = count[i];
02355                                 int cj = count[j];
02356                                 
02357                                 int q = nz_start[i];
02358                                 int k;
02359                                 for(k=0;k<ci;k++)
02360                                         if(nonzero[si+k])
02361                                                 model->sv_coef[j-1][q++] = f[p].alpha[k];
02362                                 q = nz_start[j];
02363                                 for(k=0;k<cj;k++)
02364                                         if(nonzero[sj+k])
02365                                                 model->sv_coef[i][q++] = f[p].alpha[ci+k];
02366                                 ++p;
02367                         }
02368                 
02369                 free(label);
02370                 free(probA);
02371                 free(probB);
02372                 free(count);
02373                 free(perm);
02374                 free(start);
02375                 free(x);
02376                 free(weighted_C);
02377                 free(nonzero);
02378                 for(i=0;i<nr_class*(nr_class-1)/2;i++)
02379                         free(f[i].alpha);
02380                 free(f);
02381                 free(nz_count);
02382                 free(nz_start);
02383         }
02384         return model;
02385 }
02386 
02387 // Stratified cross validation
02388 void svm_cross_validation(const svm_problem *prob, const svm_parameter *param, int nr_fold, double *target)
02389 {
02390         int i;
02391         int *fold_start;
02392         int l = prob->l;
02393         int *perm = Malloc(int,l);
02394         int nr_class;
02395         if (nr_fold > l)
02396         {
02397                 nr_fold = l;
02398                 fprintf(stderr,"WARNING: # folds > # data. Will use # folds = # data instead (i.e., leave-one-out cross validation)\n");
02399         }
02400         fold_start = Malloc(int,nr_fold+1);
02401         // stratified cv may not give leave-one-out rate
02402         // Each class to l folds -> some folds may have zero elements
02403         if((param->svm_type == C_SVC ||
02404             param->svm_type == NU_SVC) && nr_fold < l)
02405         {
02406                 int *start = NULL;
02407                 int *label = NULL;
02408                 int *count = NULL;
02409                 svm_group_classes(prob,&nr_class,&label,&start,&count,perm);
02410 
02411                 // random shuffle and then data grouped by fold using the array perm
02412                 int *fold_count = Malloc(int,nr_fold);
02413                 int c;
02414                 int *index = Malloc(int,l);
02415                 for(i=0;i<l;i++)
02416                         index[i]=perm[i];
02417                 for (c=0; c<nr_class; c++) 
02418                         for(i=0;i<count[c];i++)
02419                         {
02420                                 int j = i+rand()%(count[c]-i);
02421                                 swap(index[start[c]+j],index[start[c]+i]);
02422                         }
02423                 for(i=0;i<nr_fold;i++)
02424                 {
02425                         fold_count[i] = 0;
02426                         for (c=0; c<nr_class;c++)
02427                                 fold_count[i]+=(i+1)*count[c]/nr_fold-i*count[c]/nr_fold;
02428                 }
02429                 fold_start[0]=0;
02430                 for (i=1;i<=nr_fold;i++)
02431                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02432                 for (c=0; c<nr_class;c++)
02433                         for(i=0;i<nr_fold;i++)
02434                         {
02435                                 int begin = start[c]+i*count[c]/nr_fold;
02436                                 int end = start[c]+(i+1)*count[c]/nr_fold;
02437                                 for(int j=begin;j<end;j++)
02438                                 {
02439                                         perm[fold_start[i]] = index[j];
02440                                         fold_start[i]++;
02441                                 }
02442                         }
02443                 fold_start[0]=0;
02444                 for (i=1;i<=nr_fold;i++)
02445                         fold_start[i] = fold_start[i-1]+fold_count[i-1];
02446                 free(start);
02447                 free(label);
02448                 free(count);
02449                 free(index);
02450                 free(fold_count);
02451         }
02452         else
02453         {
02454                 for(i=0;i<l;i++) perm[i]=i;
02455                 for(i=0;i<l;i++)
02456                 {
02457                         int j = i+rand()%(l-i);
02458                         swap(perm[i],perm[j]);
02459                 }
02460                 for(i=0;i<=nr_fold;i++)
02461                         fold_start[i]=i*l/nr_fold;
02462         }
02463 
02464         for(i=0;i<nr_fold;i++)
02465         {
02466                 int begin = fold_start[i];
02467                 int end = fold_start[i+1];
02468                 int j,k;
02469                 struct svm_problem subprob;
02470 
02471                 subprob.l = l-(end-begin);
02472                 subprob.x = Malloc(struct svm_node*,subprob.l);
02473                 subprob.y = Malloc(double,subprob.l);
02474                         
02475                 k=0;
02476                 for(j=0;j<begin;j++)
02477                 {
02478                         subprob.x[k] = prob->x[perm[j]];
02479                         subprob.y[k] = prob->y[perm[j]];
02480                         ++k;
02481                 }
02482                 for(j=end;j<l;j++)
02483                 {
02484                         subprob.x[k] = prob->x[perm[j]];
02485                         subprob.y[k] = prob->y[perm[j]];
02486                         ++k;
02487                 }
02488                 struct svm_model *submodel = svm_train(&subprob,param);
02489                 if(param->probability && 
02490                    (param->svm_type == C_SVC || param->svm_type == NU_SVC))
02491                 {
02492                         double *prob_estimates=Malloc(double,svm_get_nr_class(submodel));
02493                         for(j=begin;j<end;j++)
02494                                 target[perm[j]] = svm_predict_probability(submodel,prob->x[perm[j]],prob_estimates);
02495                         free(prob_estimates);
02496                 }
02497                 else
02498                         for(j=begin;j<end;j++)
02499                                 target[perm[j]] = svm_predict(submodel,prob->x[perm[j]]);
02500                 svm_free_and_destroy_model(&submodel);
02501                 free(subprob.x);
02502                 free(subprob.y);
02503         }               
02504         free(fold_start);
02505         free(perm);
02506 }
02507 
02508 
02509 int svm_get_svm_type(const svm_model *model)
02510 {
02511         return model->param.svm_type;
02512 }
02513 
02514 int svm_get_nr_class(const svm_model *model)
02515 {
02516         return model->nr_class;
02517 }
02518 
02519 void svm_get_labels(const svm_model *model, int* label)
02520 {
02521         if (model->label != NULL)
02522                 for(int i=0;i<model->nr_class;i++)
02523                         label[i] = model->label[i];
02524 }
02525 
02526 void svm_get_sv_indices(const svm_model *model, int* indices)
02527 {
02528         if (model->sv_indices != NULL)
02529                 for(int i=0;i<model->l;i++)
02530                         indices[i] = model->sv_indices[i];
02531 }
02532 
02533 int svm_get_nr_sv(const svm_model *model)
02534 {
02535         return model->l;
02536 }
02537 
02538 double svm_get_svr_probability(const svm_model *model)
02539 {
02540         if ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
02541             model->probA!=NULL)
02542                 return model->probA[0];
02543         else
02544         {
02545                 fprintf(stderr,"Model doesn't contain information for SVR probability inference\n");
02546                 return 0;
02547         }
02548 }
02549 
02550 double svm_predict_values(const svm_model *model, const svm_node *x, double* dec_values)
02551 {
02552         int i;
02553         if(model->param.svm_type == ONE_CLASS ||
02554            model->param.svm_type == EPSILON_SVR ||
02555            model->param.svm_type == NU_SVR)
02556         {
02557                 double *sv_coef = model->sv_coef[0];
02558                 double sum = 0;
02559                 for(i=0;i<model->l;i++)
02560                         sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);
02561                 sum -= model->rho[0];
02562                 *dec_values = sum;
02563 
02564                 if(model->param.svm_type == ONE_CLASS)
02565                         return (sum>0)?1:-1;
02566                 else
02567                         return sum;
02568         }
02569         else
02570         {
02571                 int nr_class = model->nr_class;
02572                 int l = model->l;
02573                 
02574                 double *kvalue = Malloc(double,l);
02575                 for(i=0;i<l;i++)
02576                         kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);
02577 
02578                 int *start = Malloc(int,nr_class);
02579                 start[0] = 0;
02580                 for(i=1;i<nr_class;i++)
02581                         start[i] = start[i-1]+model->nSV[i-1];
02582 
02583                 int *vote = Malloc(int,nr_class);
02584                 for(i=0;i<nr_class;i++)
02585                         vote[i] = 0;
02586 
02587                 int p=0;
02588                 for(i=0;i<nr_class;i++)
02589                         for(int j=i+1;j<nr_class;j++)
02590                         {
02591                                 double sum = 0;
02592                                 int si = start[i];
02593                                 int sj = start[j];
02594                                 int ci = model->nSV[i];
02595                                 int cj = model->nSV[j];
02596                                 
02597                                 int k;
02598                                 double *coef1 = model->sv_coef[j-1];
02599                                 double *coef2 = model->sv_coef[i];
02600                                 for(k=0;k<ci;k++)
02601                                         sum += coef1[si+k] * kvalue[si+k];
02602                                 for(k=0;k<cj;k++)
02603                                         sum += coef2[sj+k] * kvalue[sj+k];
02604                                 sum -= model->rho[p];
02605                                 dec_values[p] = sum;
02606 
02607                                 if(dec_values[p] > 0)
02608                                         ++vote[i];
02609                                 else
02610                                         ++vote[j];
02611                                 p++;
02612                         }
02613 
02614                 int vote_max_idx = 0;
02615                 for(i=1;i<nr_class;i++)
02616                         if(vote[i] > vote[vote_max_idx])
02617                                 vote_max_idx = i;
02618 
02619                 free(kvalue);
02620                 free(start);
02621                 free(vote);
02622                 return model->label[vote_max_idx];
02623         }
02624 }
02625 
02626 double svm_predict(const svm_model *model, const svm_node *x)
02627 {
02628         int nr_class = model->nr_class;
02629         double *dec_values;
02630         if(model->param.svm_type == ONE_CLASS ||
02631            model->param.svm_type == EPSILON_SVR ||
02632            model->param.svm_type == NU_SVR)
02633                 dec_values = Malloc(double, 1);
02634         else 
02635                 dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02636         double pred_result = svm_predict_values(model, x, dec_values);
02637         free(dec_values);
02638         return pred_result;
02639 }
02640 
02641 double svm_predict_probability(
02642         const svm_model *model, const svm_node *x, double *prob_estimates)
02643 {
02644         if ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
02645             model->probA!=NULL && model->probB!=NULL)
02646         {
02647                 int i;
02648                 int nr_class = model->nr_class;
02649                 double *dec_values = Malloc(double, nr_class*(nr_class-1)/2);
02650                 svm_predict_values(model, x, dec_values);
02651 
02652                 double min_prob=1e-7;
02653                 double **pairwise_prob=Malloc(double *,nr_class);
02654                 for(i=0;i<nr_class;i++)
02655                         pairwise_prob[i]=Malloc(double,nr_class);
02656                 int k=0;
02657                 for(i=0;i<nr_class;i++)
02658                         for(int j=i+1;j<nr_class;j++)
02659                         {
02660                                 pairwise_prob[i][j]=min(max(sigmoid_predict(dec_values[k],model->probA[k],model->probB[k]),min_prob),1-min_prob);
02661                                 pairwise_prob[j][i]=1-pairwise_prob[i][j];
02662                                 k++;
02663                         }
02664                 multiclass_probability(nr_class,pairwise_prob,prob_estimates);
02665 
02666                 int prob_max_idx = 0;
02667                 for(i=1;i<nr_class;i++)
02668                         if(prob_estimates[i] > prob_estimates[prob_max_idx])
02669                                 prob_max_idx = i;
02670                 for(i=0;i<nr_class;i++)
02671                         free(pairwise_prob[i]);
02672                 free(dec_values);
02673                 free(pairwise_prob);
02674                 return model->label[prob_max_idx];
02675         }
02676         else 
02677                 return svm_predict(model, x);
02678 }
02679 
02680 static const char *svm_type_table[] =
02681 {
02682         "c_svc","nu_svc","one_class","epsilon_svr","nu_svr",NULL
02683 };
02684 
02685 static const char *kernel_type_table[]=
02686 {
02687         "linear","polynomial","rbf","sigmoid","precomputed",NULL
02688 };
02689 
02690 int svm_save_model(const char *model_file_name, const svm_model *model)
02691 {
02692         FILE *fp = fopen(model_file_name,"w");
02693         if(fp==NULL) return -1;
02694 
02695         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02696         setlocale(LC_ALL, "C");
02697 
02698         const svm_parameter& param = model->param;
02699 
02700         fprintf(fp,"svm_type %s\n", svm_type_table[param.svm_type]);
02701         fprintf(fp,"kernel_type %s\n", kernel_type_table[param.kernel_type]);
02702 
02703         if(param.kernel_type == POLY)
02704                 fprintf(fp,"degree %d\n", param.degree);
02705 
02706         if(param.kernel_type == POLY || param.kernel_type == RBF || param.kernel_type == SIGMOID)
02707                 fprintf(fp,"gamma %g\n", param.gamma);
02708 
02709         if(param.kernel_type == POLY || param.kernel_type == SIGMOID)
02710                 fprintf(fp,"coef0 %g\n", param.coef0);
02711 
02712         int nr_class = model->nr_class;
02713         int l = model->l;
02714         fprintf(fp, "nr_class %d\n", nr_class);
02715         fprintf(fp, "total_sv %d\n",l);
02716         
02717         {
02718                 fprintf(fp, "rho");
02719                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02720                         fprintf(fp," %g",model->rho[i]);
02721                 fprintf(fp, "\n");
02722         }
02723         
02724         if(model->label)
02725         {
02726                 fprintf(fp, "label");
02727                 for(int i=0;i<nr_class;i++)
02728                         fprintf(fp," %d",model->label[i]);
02729                 fprintf(fp, "\n");
02730         }
02731 
02732         if(model->probA) // regression has probA only
02733         {
02734                 fprintf(fp, "probA");
02735                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02736                         fprintf(fp," %g",model->probA[i]);
02737                 fprintf(fp, "\n");
02738         }
02739         if(model->probB)
02740         {
02741                 fprintf(fp, "probB");
02742                 for(int i=0;i<nr_class*(nr_class-1)/2;i++)
02743                         fprintf(fp," %g",model->probB[i]);
02744                 fprintf(fp, "\n");
02745         }
02746 
02747         if(model->nSV)
02748         {
02749                 fprintf(fp, "nr_sv");
02750                 for(int i=0;i<nr_class;i++)
02751                         fprintf(fp," %d",model->nSV[i]);
02752                 fprintf(fp, "\n");
02753         }
02754 
02755         fprintf(fp, "SV\n");
02756         const double * const *sv_coef = model->sv_coef;
02757         const svm_node * const *SV = model->SV;
02758 
02759         for(int i=0;i<l;i++)
02760         {
02761                 for(int j=0;j<nr_class-1;j++)
02762                         fprintf(fp, "%.16g ",sv_coef[j][i]);
02763 
02764                 const svm_node *p = SV[i];
02765 
02766                 if(param.kernel_type == PRECOMPUTED)
02767                         fprintf(fp,"0:%d ",(int)(p->value));
02768                 else
02769                         while(p->index != -1)
02770                         {
02771                                 fprintf(fp,"%d:%.8g ",p->index,p->value);
02772                                 p++;
02773                         }
02774                 fprintf(fp, "\n");
02775         }
02776 
02777         setlocale(LC_ALL, old_locale);
02778         free(old_locale);
02779 
02780         if (ferror(fp) != 0 || fclose(fp) != 0) return -1;
02781         else return 0;
02782 }
02783 
02784 static char *line = NULL;
02785 static int max_line_len;
02786 
02787 static char* readline(FILE *input)
02788 {
02789         if(fgets(line,max_line_len,input) == NULL)
02790                 return NULL;
02791 
02792         while(strrchr(line,'\n') == NULL)
02793         {
02794                 max_line_len *= 2;
02795                 line = (char *) realloc(line,max_line_len);
02796                 int len = (int) strlen(line);
02797                 if(fgets(line+len,max_line_len-len,input) == NULL)
02798                         break;
02799         }
02800         return line;
02801 }
02802 
02803 //
02804 // FSCANF helps to handle fscanf failures.
02805 // Its do-while block avoids the ambiguity when
02806 // if (...)
02807 //    FSCANF();
02808 // is used
02809 //
02810 #define FSCANF(_stream, _format, _var) do{ if (fscanf(_stream, _format, _var) != 1) return false; }while(0)
02811 bool read_model_header(FILE *fp, svm_model* model)
02812 {
02813         svm_parameter& param = model->param;
02814         char cmd[101];
02815         while(1)
02816         {
02817                 FSCANF(fp,"%100s",cmd);
02818 
02819                 if(strcmp(cmd,"svm_type")==0)
02820                 {
02821                         FSCANF(fp,"%100s",cmd);
02822                         int i;
02823                         for(i=0;svm_type_table[i];i++)
02824                         {
02825                                 if(strcmp(svm_type_table[i],cmd)==0)
02826                                 {
02827                                         param.svm_type=i;
02828                                         break;
02829                                 }
02830                         }
02831                         if(svm_type_table[i] == NULL)
02832                         {
02833                                 fprintf(stderr,"unknown svm type.\n");
02834                                 return false;
02835                         }
02836                 }
02837                 else if(strcmp(cmd,"kernel_type")==0)
02838                 {               
02839                         FSCANF(fp,"%100s",cmd);
02840                         int i;
02841                         for(i=0;kernel_type_table[i];i++)
02842                         {
02843                                 if(strcmp(kernel_type_table[i],cmd)==0)
02844                                 {
02845                                         param.kernel_type=i;
02846                                         break;
02847                                 }
02848                         }
02849                         if(kernel_type_table[i] == NULL)
02850                         {
02851                                 fprintf(stderr,"unknown kernel function.\n");   
02852                                 return false;
02853                         }
02854                 }
02855                 else if(strcmp(cmd,"degree")==0)
02856                         FSCANF(fp,"%d",&param.degree);
02857                 else if(strcmp(cmd,"gamma")==0)
02858                         FSCANF(fp,"%lf",&param.gamma);
02859                 else if(strcmp(cmd,"coef0")==0)
02860                         FSCANF(fp,"%lf",&param.coef0);
02861                 else if(strcmp(cmd,"nr_class")==0)
02862                         FSCANF(fp,"%d",&model->nr_class);
02863                 else if(strcmp(cmd,"total_sv")==0)
02864                         FSCANF(fp,"%d",&model->l);
02865                 else if(strcmp(cmd,"rho")==0)
02866                 {
02867                         int n = model->nr_class * (model->nr_class-1)/2;
02868                         model->rho = Malloc(double,n);
02869                         for(int i=0;i<n;i++)
02870                                 FSCANF(fp,"%lf",&model->rho[i]);
02871                 }
02872                 else if(strcmp(cmd,"label")==0)
02873                 {
02874                         int n = model->nr_class;
02875                         model->label = Malloc(int,n);
02876                         for(int i=0;i<n;i++)
02877                                 FSCANF(fp,"%d",&model->label[i]);
02878                 }
02879                 else if(strcmp(cmd,"probA")==0)
02880                 {
02881                         int n = model->nr_class * (model->nr_class-1)/2;
02882                         model->probA = Malloc(double,n);
02883                         for(int i=0;i<n;i++)
02884                                 FSCANF(fp,"%lf",&model->probA[i]);
02885                 }
02886                 else if(strcmp(cmd,"probB")==0)
02887                 {
02888                         int n = model->nr_class * (model->nr_class-1)/2;
02889                         model->probB = Malloc(double,n);
02890                         for(int i=0;i<n;i++)
02891                                 FSCANF(fp,"%lf",&model->probB[i]);
02892                 }
02893                 else if(strcmp(cmd,"nr_sv")==0)
02894                 {
02895                         int n = model->nr_class;
02896                         model->nSV = Malloc(int,n);
02897                         for(int i=0;i<n;i++)
02898                                 FSCANF(fp,"%d",&model->nSV[i]);
02899                 }
02900                 else if(strcmp(cmd,"license")==0)
02901                 {
02902                         FSCANF(fp,"%100s",cmd);
02903                         //std::cout << "License: " << cmd << std::endl;
02904                 
02905                 }
02906                 else if(strcmp(cmd,"SV")==0)
02907                 {
02908                         while(1)
02909                         {
02910                                 int c = getc(fp);
02911                                 if(c==EOF || c=='\n') break;
02912                         }
02913                         break;
02914                 }
02915                 else
02916                 {
02917                         fprintf(stderr,"unknown text in model file: [%s]\n",cmd);
02918                         return false;
02919                 }
02920         }
02921 
02922         return true;
02923 
02924 }
02925 
02926 svm_model *svm_load_model(const char *model_file_name)
02927 {
02928         FILE *fp = fopen(model_file_name,"rb");
02929         if(fp==NULL) return NULL;
02930 
02931         char *old_locale = strdup(setlocale(LC_ALL, NULL));
02932         setlocale(LC_ALL, "C");
02933 
02934         // read parameters
02935 
02936         svm_model *model = Malloc(svm_model,1);
02937         model->rho = NULL;
02938         model->probA = NULL;
02939         model->probB = NULL;
02940         model->sv_indices = NULL;
02941         model->label = NULL;
02942         model->nSV = NULL;
02943         
02944         // read header
02945         if (!read_model_header(fp, model))
02946         {
02947                 fprintf(stderr, "ERROR: fscanf failed to read model\n");
02948                 setlocale(LC_ALL, old_locale);
02949                 free(old_locale);
02950                 free(model->rho);
02951                 free(model->label);
02952                 free(model->nSV);
02953                 free(model);
02954                 return NULL;
02955         }
02956         
02957         // read sv_coef and SV
02958 
02959         int elements = 0;
02960         long pos = ftell(fp);
02961 
02962         max_line_len = 1024;
02963         line = Malloc(char,max_line_len);
02964         char *p,*endptr,*idx,*val;
02965 
02966         while(readline(fp)!=NULL)
02967         {
02968                 p = strtok(line,":");
02969                 while(1)
02970                 {
02971                         p = strtok(NULL,":");
02972                         if(p == NULL)
02973                                 break;
02974                         ++elements;
02975                 }
02976         }
02977         elements += model->l;
02978 
02979         fseek(fp,pos,SEEK_SET);
02980 
02981         int m = model->nr_class - 1;
02982         int l = model->l;
02983         model->sv_coef = Malloc(double *,m);
02984         int i;
02985         for(i=0;i<m;i++)
02986                 model->sv_coef[i] = Malloc(double,l);
02987         model->SV = Malloc(svm_node*,l);
02988         svm_node *x_space = NULL;
02989         if(l>0) x_space = Malloc(svm_node,elements);
02990 
02991         int j=0;
02992         for(i=0;i<l;i++)
02993         {
02994                 readline(fp);
02995                 model->SV[i] = &x_space[j];
02996 
02997                 p = strtok(line, " \t");
02998                 model->sv_coef[0][i] = strtod(p,&endptr);
02999                 for(int k=1;k<m;k++)
03000                 {
03001                         p = strtok(NULL, " \t");
03002                         model->sv_coef[k][i] = strtod(p,&endptr);
03003                 }
03004 
03005                 while(1)
03006                 {
03007                         idx = strtok(NULL, ":");
03008                         val = strtok(NULL, " \t");
03009 
03010                         if(val == NULL)
03011                                 break;
03012                         x_space[j].index = (int) strtol(idx,&endptr,10);
03013                         x_space[j].value = strtod(val,&endptr);
03014 
03015                         ++j;
03016                 }
03017                 x_space[j++].index = -1;
03018         }
03019         free(line);
03020 
03021         setlocale(LC_ALL, old_locale);
03022         free(old_locale);
03023 
03024         if (ferror(fp) != 0 || fclose(fp) != 0)
03025                 return NULL;
03026 
03027         model->free_sv = 1;     // XXX
03028         return model;
03029 }
03030 
03031 void svm_free_model_content(svm_model* model_ptr)
03032 {
03033         if(model_ptr->free_sv && model_ptr->l > 0 && model_ptr->SV != NULL)
03034                 free((void *)(model_ptr->SV[0]));
03035         if(model_ptr->sv_coef)
03036         {
03037                 for(int i=0;i<model_ptr->nr_class-1;i++)
03038                         free(model_ptr->sv_coef[i]);
03039         }
03040 
03041         free(model_ptr->SV);
03042         model_ptr->SV = NULL;
03043 
03044         free(model_ptr->sv_coef);
03045         model_ptr->sv_coef = NULL;
03046 
03047         free(model_ptr->rho);
03048         model_ptr->rho = NULL;
03049 
03050         free(model_ptr->label);
03051         model_ptr->label= NULL;
03052 
03053         free(model_ptr->probA);
03054         model_ptr->probA = NULL;
03055 
03056         free(model_ptr->probB);
03057         model_ptr->probB= NULL;
03058 
03059         free(model_ptr->sv_indices);
03060         model_ptr->sv_indices = NULL;
03061 
03062         free(model_ptr->nSV);
03063         model_ptr->nSV = NULL;
03064 }
03065 
03066 void svm_free_and_destroy_model(svm_model** model_ptr_ptr)
03067 {
03068         if(model_ptr_ptr != NULL && *model_ptr_ptr != NULL)
03069         {
03070                 svm_free_model_content(*model_ptr_ptr);
03071                 free(*model_ptr_ptr);
03072                 *model_ptr_ptr = NULL;
03073         }
03074 }
03075 
03076 void svm_destroy_param(svm_parameter* param)
03077 {
03078         free(param->weight_label);
03079         free(param->weight);
03080 }
03081 
03082 const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param)
03083 {
03084         // svm_type
03085 
03086         int svm_type = param->svm_type;
03087         if(svm_type != C_SVC &&
03088            svm_type != NU_SVC &&
03089            svm_type != ONE_CLASS &&
03090            svm_type != EPSILON_SVR &&
03091            svm_type != NU_SVR)
03092                 return "unknown svm type";
03093         
03094         // kernel_type, degree
03095         
03096         int kernel_type = param->kernel_type;
03097         if(kernel_type != LINEAR &&
03098            kernel_type != POLY &&
03099            kernel_type != RBF &&
03100            kernel_type != SIGMOID &&
03101            kernel_type != PRECOMPUTED)
03102                 return "unknown kernel type";
03103 
03104         if(param->gamma < 0)
03105                 return "gamma < 0";
03106 
03107         if(param->degree < 0)
03108                 return "degree of polynomial kernel < 0";
03109 
03110         // cache_size,eps,C,nu,p,shrinking
03111 
03112         if(param->cache_size <= 0)
03113                 return "cache_size <= 0";
03114 
03115         if(param->eps <= 0)
03116                 return "eps <= 0";
03117 
03118         if(svm_type == C_SVC ||
03119            svm_type == EPSILON_SVR ||
03120            svm_type == NU_SVR)
03121                 if(param->C <= 0)
03122                         return "C <= 0";
03123 
03124         if(svm_type == NU_SVC ||
03125            svm_type == ONE_CLASS ||
03126            svm_type == NU_SVR)
03127                 if(param->nu <= 0 || param->nu > 1)
03128                         return "nu <= 0 or nu > 1";
03129 
03130         if(svm_type == EPSILON_SVR)
03131                 if(param->p < 0)
03132                         return "p < 0";
03133 
03134         if(param->shrinking != 0 &&
03135            param->shrinking != 1)
03136                 return "shrinking != 0 and shrinking != 1";
03137 
03138         if(param->probability != 0 &&
03139            param->probability != 1)
03140                 return "probability != 0 and probability != 1";
03141 
03142         if(param->probability == 1 &&
03143            svm_type == ONE_CLASS)
03144                 return "one-class SVM probability output not supported yet";
03145 
03146 
03147         // check whether nu-svc is feasible
03148         
03149         if(svm_type == NU_SVC)
03150         {
03151                 int l = prob->l;
03152                 int max_nr_class = 16;
03153                 int nr_class = 0;
03154                 int *label = Malloc(int,max_nr_class);
03155                 int *count = Malloc(int,max_nr_class);
03156 
03157                 int i;
03158                 for(i=0;i<l;i++)
03159                 {
03160                         int this_label = (int)prob->y[i];
03161                         int j;
03162                         for(j=0;j<nr_class;j++)
03163                                 if(this_label == label[j])
03164                                 {
03165                                         ++count[j];
03166                                         break;
03167                                 }
03168                         if(j == nr_class)
03169                         {
03170                                 if(nr_class == max_nr_class)
03171                                 {
03172                                         max_nr_class *= 2;
03173                                         label = (int *)realloc(label,max_nr_class*sizeof(int));
03174                                         count = (int *)realloc(count,max_nr_class*sizeof(int));
03175                                 }
03176                                 label[nr_class] = this_label;
03177                                 count[nr_class] = 1;
03178                                 ++nr_class;
03179                         }
03180                 }
03181         
03182                 for(i=0;i<nr_class;i++)
03183                 {
03184                         int n1 = count[i];
03185                         for(int j=i+1;j<nr_class;j++)
03186                         {
03187                                 int n2 = count[j];
03188                                 if(param->nu*(n1+n2)/2 > min(n1,n2))
03189                                 {
03190                                         free(label);
03191                                         free(count);
03192                                         return "specified nu is infeasible";
03193                                 }
03194                         }
03195                 }
03196                 free(label);
03197                 free(count);
03198         }
03199 
03200         return NULL;
03201 }
03202 
03203 int svm_check_probability_model(const svm_model *model)
03204 {
03205         return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&
03206                 model->probA!=NULL && model->probB!=NULL) ||
03207                 ((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&
03208                  model->probA!=NULL);
03209 }
03210 
03211 void svm_set_print_string_function(void (*print_func)(const char *))
03212 {
03213         if(print_func == NULL)
03214                 svm_print_string = &print_string_stdout;
03215         else
03216                 svm_print_string = print_func;
03217 }
03218 
03219 }; // namespace

Generated on 31 Aug 2016 for Hugintrunk by  doxygen 1.4.7