# Course "Kernel methods" # # Homepage : http://www.bioinfo.ensmp.fr/~vert/teaching/2007master # # Copyright 2007 Jean-Philippe Vert # # # # Homework 2 # # Support vector machines # # This time we do it in R (http://www.r-project.org/). # We use the package svmpath that computes the SVM solution for all # regularization parameter simultaneously # Load the svmpath package. Make sure it is installed on your machine, # otherwise install it with install.packages('svmpath') require(svmpath) par(ask=T) # Load the data x=read.table('data.txt',sep=',') y=read.table('labels.txt') # Warning: y is 0/1 valued, but SVMpath requires -1/1 values y = 2*as.numeric(y[[1]])-1 # Warning: x is badly scaled (try plot(sd(x))) so we should scale it before xscale = scale(x, center = TRUE, scale = TRUE) # The dataset is too large for a true cross-validation, so we just randomly train on a subset n = dim(xscale)[1] # number of points trainsize = 100 # size of the training set niter = 10 # number of times we repeat the train/test loop lambdas = 1.5^(-20+seq(50)) # values of lambda to be tested accuracytrain = matrix(0,length(lambdas),niter) accuracytest = matrix(0,length(lambdas),niter) # Linear kernel print('Now working with the linear kernel') for (i in seq(niter)) { print(paste('Iteration',i,'..')) # prepare the trainint and test sets traini = sample(seq(n),trainsize) xtrain = xscale[traini,] ytrain = y[traini] xtest = xscale[-traini,] ytest = y[-traini] # Train the SVM s = svmpath(xtrain,ytrain) # Predict ypredtrain = predict(s,xtrain,lambdas,type='class') ypredtest = predict(s,xtest,lambdas,type='class') # Evaluate the prediction accuracytrain[,i] = (apply(ytrain*ypredtrain, 2, mean) + 1)/2 accuracytest[,i] = (apply(ytest*ypredtest, 2, mean) + 1)/2 } plot(lambdas,apply(accuracytrain,1,mean),'l',log='x',xlab='Lambda',ylab='Accuracy',col=1) lines(lambdas,apply(accuracytest,1,mean),col=2) title('Linear kernel') legend("topright",legend=c('Train','Test'),col=seq(2),lty=1) grid() # Gaussian kernel for (p in c(0.01, 0.02, 0.05, 0.1, 0.2, 0.5)) { for (i in seq(niter)) { print(paste('Iteration',i,'..')) traini = sample(seq(n),trainsize) xtrain = xscale[traini,] ytrain = y[traini] xtest = xscale[-traini,] ytest = y[-traini] s = svmpath(xtrain,ytrain,kernel.function=radial.kernel,param.kernel=p) ypredtrain = predict(s,xtrain,lambdas,type='class') ypredtest = predict(s,xtest,lambdas,type='class') accuracytrain[,i] = (apply(ytrain*ypredtrain, 2, mean) + 1)/2 accuracytest[,i] = (apply(ytest*ypredtest, 2, mean) + 1)/2 } plot(lambdas,apply(accuracytrain,1,mean),'l',log='x',xlab='Lambda',ylab='Accuracy',col=1) lines(lambdas,apply(accuracytest,1,mean),col=2) title(paste('Gaussian kernel, p=',p)) legend("topright",legend=c('Train','Test'),col=seq(2),lty=1) grid() }