/* Written by Jean-Philippe Vert on July 3rd, 2001. * Permanent e-mail : Jean-Philippe.Vert@mines.org * * This file contains a set of fonctions to implement a support vector machine with the kernels defined in the paper "Support vector machine prediction of signal peptide cleavage site using a new class of kernels for strings" (Proceedings of the Pacific Symposium on Biocomputing 2001). * It contains a set of functions to replace the default fonction in the file 'kernel.cpp' of the mySVM software source code from Stefan Ruping (available at http://www-ai.cs.uni-dortmund.de/SOFTWARE/MYSVM ). * * USAGE : copy the following functions and paste them in the file 'kernel.cpp' from the mySVM source code, then compile mySVM. * * More informations can be found from the author's homepage: * http://web.kuicr.kyoto-u.ac.jp/~vert */ // Start copy here /* * * kernel_user_c: Enter your own kernel code here * */ kernel_user_c::kernel_user_c(){ param_i_1 = 0; param_i_2 = 0; param_i_3 = 0; param_i_4 = 0; param_i_5 = 0; param_f_1 = 0; param_f_2 = 1; param_f_3 = 0; param_f_4 = 0; param_f_5 = 0; }; /* The following function compute the P-kernel between two examples based on an independant probabilistic model. * Both examples are supposed to be strings of integers with the same length (corresponding for instance to sequences of amino-acid, where each amino-acid is mapped to an integer). * * Each example is supposed to coded in the following way: * I1 L1 I2 L2 ... In Ln * where I1, I2, .., In is the sequence of integers and for each i=1,..,n, Li is the logarithm of the probability of the integer Ii at the i-th position. * * The logarithm of the kernel between two examples x and y coded as * Ix1 Lx1 Ix2 Lx2 ... Ixn Lxn * and * Iy1 Ly1 Iy2 Ly2 ... Iyn Lyn * can be found by the formula: * log K(x,y) = Q1 + Q2 + ... + Q_n * where Qi for i=1,..,n is given by: * Qi = Lxi + Lyi , if Ixi <> Iyi , * Qi = Lxi + log ( 1 + exp( Lxi ) ) , if Ixi = Iyi. * */ SVMFLOAT kernel_user_c::calculate_K(const svm_example x, const svm_example y){ SVMFLOAT result=0; // Read the two examples together with their length svm_attrib* att_x = x.example; svm_attrib* att_y = y.example; svm_attrib* length_x = &(att_x[x.length]); svm_attrib* length_y = &(att_y[y.length]); // Two examples should have the same length if (x.length != y.length) { cout<<"Error in the example file "<att<<" "<att<<"\n"; return 0; }; // Read the sequences one character at a time while((att_x < length_x) && (att_y < length_y)){ // 'result' contains the logarithm of the kernel, which is computed incrementally if(att_x->att == att_y->att){ // If both examples have the same character at the current position: result += (att_x+1)->att + log(1+exp((att_x+1)->att)); } else{ // Otherwise: result += (att_x+1)->att + (att_y+1)->att; }; // move to the next character att_x+=2; att_y+=2; }; // Rescale the result by the formula: // K(x,y) = exp( param_f_2 * ( result + param_f_1 ) ) result += param_f_1; result *= param_f_2; result = exp(result); return result; }; void kernel_user_c::input(istream& data_stream){ // read comments and parameters until next @ // WARNING: no checks of the input values are performed char next = data_stream.peek(); char* s = new char[MAXCHAR]; while((! data_stream.eof()) && (next != '@')){ if('#' == next){ // ignore comment data_stream.getline(s,MAXCHAR); } else if(('\n' == next) || ('\t' == next) || ('\r' == next) || ('\f' == next) || (' ' == next)){ // ignore line-end next = data_stream.get(); } else{ // trying to read in parameter data_stream >> s; if(0 == strcmp("param_i_1",s)){ data_stream >> param_i_1; } else if(0 == strcmp("param_i_2",s)){ data_stream >> param_i_2; } else if(0 == strcmp("param_i_3",s)){ data_stream >> param_i_3; } else if(0 == strcmp("param_i_4",s)){ data_stream >> param_i_4; } else if(0 == strcmp("param_i_5",s)){ data_stream >> param_i_5; } else if(0 == strcmp("param_f_1",s)){ data_stream >> param_f_1; } else if(0 == strcmp("param_f_2",s)){ data_stream >> param_f_2; } else if(0 == strcmp("param_f_3",s)){ data_stream >> param_f_3; } else if(0 == strcmp("param_f_4",s)){ data_stream >> param_f_4; } else if(0 == strcmp("param_f_5",s)){ data_stream >> param_f_5; } else{ cout<<"Ignoring unknown parameter: "<