##### ##### Link Propagation.R (ver. 1.0 (090402)) ##### ##### Supplementary codes for ##### "Simultaneous Prediction of Biological Networks of Multiple Species from Genome-wide Data and Evolutionary Informationh by Kashima et al. ##### ##### Copyright(c)2009 Yoshihiro Yamanishi. All rights reservied. ##### ##### LICENSING TERMS: ##### This program is free of charge for research and education purposes. ##### However you must obtain a license from the author to use it for commercial purposes. ##### The software must not be distributed without prior permission of the author. ##### ##### NO WARRANTY: ##### This software is supplied "as is" without warranty of any kind, ##### and its authors disclaim any and all warranties, ##### including but not limited to any implied warranties of merchantability and fitness for a particular purpose, ##### and any warranties or non infringement. ##### The user assumes all liability and responsibility for use of this software ##### and in no event shall its authors be liable for damages of any kind resulting from its use. ##### ################################################### ##### sample script for using LinkPropagation ################################################### # sample script for cross validation sampleCV <- function(){ # parameters numberOfFolds <- 5 proportionOfTrainingData <- 0.75 simultaneous <- TRUE networkFileNameList <- list("admat_cel.txt","admat_hpy.txt","admat_sce.txt") similarityFileNameList <- list("Kexp_cel_sig2.txt", "swmat_cel_hpy.txt", "swmat_cel_sce.txt", "Kexp_hpy_sig2.txt", "swmat_hpy_sce.txt", "Kexp_sce_sig2.txt") # results AUCtotal <- rep(0,numberOfFolds) AUCforEachNet <- matrix(0, numberOfFolds, 3) time <- rep(0,numberOfFolds) for(fold in 1:numberOfFolds){ result <- LP(networkFileNameList, similarityFileNameList, proportionOfTrainingData=proportionOfTrainingData, fold=fold, simultaneous=simultaneous) AUCtotal[fold] <- result[[1]] AUCforEachNet[fold,] <- result[[2]] time[fold] <- result[[3]] } # show result summary for(i in 1:3){ cat("AUC for the",i,"-th net =",mean(AUCforEachNet[,i]),"+-",sd(AUCforEachNet[,i]),"\n") } cat("total AUC =",mean(AUCtotal),"+-",sd(AUCtotal),"\n") cat("time =",mean(time),"\n") } # sample script for actual application sample <- function(){ # parameters proportionOfTrainingData <- 1 simultaneous <- TRUE predictedNetwork <- as.list(NULL) networkFileNameList <- list("admat_cel.txt","admat_hpy.txt","admat_sce.txt") similarityFileNameList <- list("Kexp_cel_sig2.txt", "swmat_cel_hpy.txt", "swmat_cel_sce.txt", "Kexp_hpy_sig2.txt", "swmat_hpy_sce.txt", "Kexp_sce_sig2.txt") # results predictedNetwork <- LP(networkFileNameList, similarityFileNameList, proportionOfTrainingData=proportionOfTrainingData, fold=fold, simultaneous=simultaneous) return(predictedNetwork) } ################################################### ##### Main routine of Link Propagation ################################################### LP <- function(networkFileNameList, similarityFileNameList, proportionOfTrainingData, sigma=0.001, fold, epsilon=0.00001, simultaneous=TRUE){ # set fold if(proportionOfTrainingData<1){ set.seed(fold) cat("fold:",fold,"\n") } # read network matrices cat("reading data...\n") nnet <- length(networkFileNameList) nnodes <- NULL A <- as.list(NULL) for(i in 1:nnet){ A[[i]] <- as.matrix( read.table(networkFileNameList[[i]], header=TRUE) ) diag(A[[i]]) <- 0 nnodes[i] <- ncol(A[[i]]) # number of nodes in each network } getNetIndex <- function(i,j){ return( (i-1)*nnet + j ) } # read similarity matrices W <- as.list(NULL) index <- 0 index_ <- 0 for(i in 1:nnet){ for(j in 1:nnet){ index <- index + 1 if(i>j){ W[[index]] <- t(W[[getNetIndex(j,i)]]) }else{ index_ <- index_ + 1 k <- index %% nnet W[[index]] <- as.matrix( read.table(similarityFileNameList[[index_]], header=TRUE) ) if( (k!=i)&&(simultaneous==FALSE) ){ if(i!=j){ W[[index]][] <- 0} } W[[index]] <- W[[index]]^2 # polynomial kernel of degree 2 } } } numberOfTestData <- NULL numberOfTrainingData <- NULL numberOfTrainingPositiveData <- NULL numberOfTrainingNegativeData <- NULL numberOfTotalTestData <- 0 numberOfTotalTrainingPositiveData <- 0 numberOfTotalTrainingNegativeData <- 0 # preparing cat("preparing training data...\n") trainingIndex <- as.list(NULL) for(k in 1:nnet){ numberOfTestData[k] <- 0 numberOfTrainingPositiveData[k] <- 0 numberOfTrainingNegativeData[k] <- 0 if(proportionOfTrainingData==1){ trainingIndex[[k]] <- !(A[[k]][]==0) numberOfTrainingPositiveData[k] <- length(A[[k]][A[[k]][]>0])/2 numberOfTrainingNegativeData[k] <- length(A[[k]][A[[k]][]<0])/2 numberOfTestData[k] <- (length(A[[k]][A[[k]][]==0])-nnodes[k])/2 print(numberOfTestData[k]) }else{ trainingIndex[[k]] <- matrix(TRUE, nnodes[k], nnodes[k]) for( i in 1:(nnodes[k]-1) ){ for( j in (i+1):nnodes[k] ){ if( (A[[k]][i,j] != 0)&&(runif(1) > proportionOfTrainingData) ){ numberOfTestData[k] <- numberOfTestData[k] + 1 trainingIndex[[k]][i,j] <- FALSE trainingIndex[[k]][j,i] <- FALSE } else{ if( A[[k]][i,j] == 1 ){ numberOfTrainingPositiveData[k] <- numberOfTrainingPositiveData[k] + 1 }else{ numberOfTrainingNegativeData[k] <- numberOfTrainingNegativeData[k] + 1 } } } } } numberOfTrainingData[k] <- numberOfTrainingPositiveData[k] + numberOfTrainingNegativeData[k] numberOfTotalTestData <- numberOfTotalTestData + numberOfTestData[k] numberOfTotalTrainingPositiveData <- numberOfTotalTrainingPositiveData + numberOfTrainingPositiveData[k] numberOfTotalTrainingNegativeData <- numberOfTotalTrainingNegativeData + numberOfTrainingNegativeData[k] } numberOfTotalTrainingData <- numberOfTotalTrainingPositiveData + numberOfTotalTrainingNegativeData weightForPositives <- numberOfTotalTrainingData / numberOfTotalTrainingPositiveData weightForNegatives <- numberOfTotalTrainingData / numberOfTotalTrainingNegativeData print(weightForPositives) print(weightForNegatives) # prepare F, Fstar, R, P, and Q F <- as.list(NULL) Fstar <- as.list(NULL) R <- as.list(NULL) P <- as.list(NULL) Q <- as.list(NULL) for(k in 1:nnet){ F[[k]] <- trainingIndex[[k]] * A[[k]] F[[k]][F[[k]]>0] <- F[[k]][F[[k]]>0] * numberOfTrainingData[k]/numberOfTrainingPositiveData[k] F[[k]][F[[k]]<0] <- F[[k]][F[[k]]<0] * numberOfTrainingData[k]/numberOfTrainingNegativeData[k] diag(F[[k]]) <- 0 Fstar[[k]] <- F[[k]] R[[k]] <- matrix(0, nnodes[k], nnodes[k]) P[[k]] <- matrix(0, nnodes[k], nnodes[k]) Q[[k]] <- matrix(0, nnodes[k], nnodes[k]) } ## TRAINING cat("training...\n") startTime <- unclass(Sys.time()) # definition of D D <- as.list(NULL) index <- 0 for(k in 1:nnet){ for(p in 1:nnet){ index <- index + 1 D[[index]] <- rowSums(W[[index]]) } } # conjugate gradient temp <- as.list(NULL) index <- 0 for(k in 1:nnet){ temp[[k]] <- 0 for(p in 1:nnet){ index <- index + 1 temp[[k]] <- temp[[k]] + ( D[[index]] * t(D[[index]] * F[[k]]) ) - ( W[[index]] %*% F[[p]] %*% t(W[[index]])) } } R <- multiplyScalar(temp, -sigma) Rnorm0 <- innerProduct(R,R) P <- R while(TRUE){ temp <- as.list(NULL) index <- 0 for(k in 1:nnet){ temp[[k]] <- 0 for(p in 1:nnet){ index <- index + 1 temp[[k]] <- temp[[k]] + ( D[[index]] * t(D[[index]] * P[[k]]) ) - ( W[[index]] %*% P[[p]] %*% t(W[[index]])) } } Q <- sumMatrixArray( multiplyScalar(temp, sigma), P) alpha <- innerProduct(R,P)/innerProduct(P,Q) F <- sumMatrixArray(F, multiplyScalar(P, alpha)) RnormOLD <- innerProduct(R,R) R <- sumMatrixArray(R, multiplyScalar(Q, -alpha)) RnormNEW <- innerProduct(R,R) beta <- RnormNEW/RnormOLD cat("ratio = ", (RnormNEW/Rnorm0), "\n") if( RnormNEW < (Rnorm0 * epsilon^2)){break} P <- sumMatrixArray( R, multiplyScalar(P, beta) ) } endTime <- unclass(Sys.time()) if(proportionOfTrainingData==1){ return(F) }else{ # AUC cat("calculating AUC...\n") AUCforEachNet <- NULL aucmatTotal <- matrix(0, numberOfTotalTestData, 2) aucmatTotalIndex <- 0 for(k in 1:nnet){ cat("preparing AUC matrix for ",k,"-th net \n") aucmat <- matrix(0, numberOfTestData[[k]], 2) aucmatIndex <- 0 for( i in 1:(nnodes[k]-1) ){ for( j in (i+1):nnodes[k] ){ if( trainingIndex[[k]][i,j] == FALSE){ aucmatIndex <- aucmatIndex + 1 aucmatTotalIndex <- aucmatTotalIndex + 1 aucmat[aucmatIndex,1] <- F[[k]][i,j] aucmat[aucmatIndex,2] <- (A[[k]][i,j]+1)/2 aucmatTotal[aucmatTotalIndex,1] <- F[[k]][i,j] aucmatTotal[aucmatTotalIndex,2] <- (A[[k]][i,j]+1)/2 } } } AUCforEachNet[k] <- calculateAUC(aucmat) cat("AUC for ",k,"-th net: ",AUCforEachNet[k],"\n") } AUCtotal <- calculateAUC(aucmatTotal) cat("AUC total: ",AUCtotal,"\n") cat("time: ",endTime-startTime,"\n") return(list(AUCtotal, AUCforEachNet,(endTime-startTime))) } } ################################################### ##### Utility functions ################################################### multiplyScalar <- function(F, a){ for(i in 1:length(F)){ F[[i]] <- F[[i]] * a } return(F) } innerProduct <- function(A, B){ value <- 0 if(length(A)==length(B)){ for(i in 1:length(A)){ value <- value + sum(A[[i]]*B[[i]]) } }else{ print("lengths are not the same") } return(value) } sumMatrixArray <- function(A, B){ if(length(A)==length(B)){ for(i in 1:length(A)){ A[[i]] <- A[[i]] + B[[i]] } }else{ print("lengths are not the same") } return(A) } productMatrixArray <- function(A, B){ if(length(A)==length(B)){ for(i in 1:length(A)){ A[[i]] <- A[[i]] * B[[i]] } }else{ print("lengths are not the same") } return(A) } calculateAUC <- function(aucmat){ AUC <-0 TP <- 0 aucmat <- aucmat[order(aucmat[,1]),] for(i in length(aucmat[,1]):1){ if( aucmat[i,2] == 1){ TP <- TP + 1 } else{ AUC <- AUC + TP } } AUC <- AUC / ( sum(aucmat[,2]) * ( length(aucmat[,2]) - sum(aucmat[,2]) ) ) return(AUC) }