##### ##### SE_scripts.txt (ver. 1.0 (100531)) ##### ##### Supplementary codes for ##### "Predicting drug side-effects: a chemical fragment-based approach" ##### by Pauwels, E., Stoven,V., and Yamanishi, Y. ##### ##### Copyright(c)2010 Edouard Pauwels. All rights reservied. ##### ##### LICENSING TERMS: ##### This program is free of charge for research and education purposes. ##### However you must obtain a license from the author to use it for commercial purposes. ##### The software must not be distributed without prior permission of the author. ##### ##### NO WARRANTY: ##### This software is supplied "as is" without warranty of any kind, ##### and its authors disclaim any and all warranties, ##### including but not limited to any implied warranties of merchantability and fitness for a particular purpose, ##### and any warranties or non infringement. ##### The user assumes all liability and responsibility for use of this software ##### and in no event shall its authors be liable for damages of any kind resulting from its use. ##### ### Define functions ### AUC_score <- function(scores,Zp){ #### Given a score profile and a binary positive/negative matrix/vector/row, #### the function calculates the area under the ROC curve (AUC) for 20 thresholds. tpositive <- which(Zp==1) tnegative <- which(Zp==0) a <- scores thrs <- sort(c(0,quantile(a[which(a!=0)],(0:20)/20))) score <- matrix(0,2,length(thrs)) for (step in 1:length(thrs)){ thresh <- thrs[step] ppositive <- which(scores>=thresh) score[1,step] <- length(intersect(ppositive,tpositive))/length(tpositive) score[2,step] <- length(intersect(ppositive,tnegative))/length(tnegative) } b <- 0 for (l in 2:length(thrs)){ print(b) b <- b+(score[1,l]+score[1,l-1])*abs(score[2,l]-score[2,l-1])/2 } res <- list() res$b <- b res$score <- score res } Scores_component <- function(Xps,Zps,Zts,U,V,cors,d,mean,sd){ ### Computes three different scores profiles given SCCA results(U,V,cors,d,mean,sd), training and test profile ### matrix(Xps,Zts) and training data(mean, sd), Zps is not used ### ndrug <- dim(Xps)[1] nword <- dim(Zps)[2] nK <- dim(U)[2] a <- Xps %*% U ### Score 1 based on singular values score_1 <- a %*% diag(d) %*% t(V) %*% diag(sd) * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T) ### Score 2 based on correlation values score_2 <- a %*% diag(cors) %*% t(V) * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T) b <- svd(V) b$d[b$d < 0.05] <- 0 for (i in 1:length(b$d)){ if (b$d[i]!=0){ b$d[i] <- 1/b$d[i] } } ### Score_3 based on pseudo inverse. score_3 <- a %*% b$v %*% diag(b$d) %*% t(b$u) * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T) res <- list() res$score_1 <- score_1 res$score_2 <- score_2 res$score_3 <- score_3 res } arrange_rows <- function(Z,Z_score_1,Z_score_2,Z_score_3){ ### Sorts rows of score matrixes the same way as Z ones according to row names ### score_names <- rownames(Z_score_1) training_names <- rownames(Z) out_1 <- c() out_2 <- c() out_3 <- c() for (i in 1:length(training_names)){ name <- training_names[i] place <- score_names==name out_1 <- rbind(out_1,Z_score_1[place,]) out_2 <- rbind(out_2,Z_score_2[place,]) out_3 <- rbind(out_3,Z_score_3[place,]) } res <- list() res$s1 <- out_1 res$s2 <- out_2 res$s3 <- out_3 res } arrange_rows_single <- function(Z,Z_score){ ### Sorts rows of score matrix the same way as Z ones according to row names ### score_names <- rownames(Z_score) training_names <- rownames(Z) out_1 <- c() for (i in 1:length(training_names)){ name <- training_names[i] place <- score_names==name out_1 <- rbind(out_1,Z_score[place,]) } res <- out_1 res } ### Functions from PMA package ### CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate. if(!is.null(v) && !is.matrix(v)) v <- matrix(v,nrow=ncol(z)) if(!is.null(v) && ncol(v)K) v <- matrix(v[,1:K],ncol=K) if(is.null(v) && ncol(z)>nrow(z) && ncol(x)>nrow(x)){ v <- matrix(fastsvd(x,z)$v[,1:K],ncol=K) } else if (is.null(v) && (ncol(z)<=nrow(z) || ncol(x)<=nrow(x))){ v <- matrix(svd(t(x)%*%z)$v[,1:K],ncol=K) } return(v) } fastsvd <- function(x,z){ # fast svd of t(x)%*%z, where ncol(x)>>nrow(x) and same for z xx=x%*%t(x) xx2=msqrt(xx) y=t(z)%*%xx2 a=svd(y) v=a$u d=a$d zz=z%*%t(z) zz2=msqrt(zz) y=t(x)%*%zz2 a=svd(y) u=a$u return(list(u=u,v=v,d=d)) } msqrt <- function(x){ eigenx <- eigen(x) return(eigenx$vectors%*%diag(sqrt(pmax(0,eigenx$values)))%*%t(eigenx$vectors)) } library(PMA) ### Load dataset ### Z <- as.matrix(read.delim("./Z.txt")) X <- as.matrix(read.delim("./X.txt")) Xtest <- as.matrix(read.delim("Test_set_fingerprints.txt")) ### Standadridisation requires to delete null columns which values are all the same (no information) colsumvec.x <- apply(X,2,sum) X <- X[,colsumvec.x>0] Xtest <- Xtest[,colsumvec.x>0] colsumvec.z <- apply(Z,2,sum) Z <- Z[,colsumvec.z>0] colsumvec.z <- apply(Z,2,sum) colsumvec.x <- apply(X,2,sum) ### Parameters to build cross validation sets ### n <- nrow(X); px <- ncol(X); pz <- ncol(Z); nabs <- n rsize <- trunc(n / 5) index <- 1:n # Create 5 training and test sets based on the entire dataset for (r in 1:5){ #Delete ids from the index list rindex <- sample(index, size=trunc(n/(6 - r))) pindex.r <- rindex tindex.r <- (1:nabs)[-pindex.r] # Build training and test sets Xt <- X[tindex.r, ] Zt <- Z[tindex.r, ] Xp <- X[pindex.r, ] Zp <- Z[pindex.r, ] #Update index and number of ids left index <- index[!index%in%pindex.r] n <- length(index) # normatlization for X and Z based on training mean and sd xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean) xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd) xsd[xsd==0] <- 1; zsd[zsd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) Xts <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Zts <- Zt - matrix(zmean, nrow(Zt), ncol(Zt), byrow=T) Xps <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) Zps <- Zp - matrix(zmean, nrow(Zp), ncol(Zp), byrow=T) Xts <- Xts * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Zts <- Zts * matrix(1/zsd, nrow(Zt), ncol(Zt), byrow=T) Xps <- Xps * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) Zps <- Zps * matrix(1/zsd, nrow(Zp), ncol(Zp), byrow=T) # Save the results write.table(round(Xt,3), file=paste("./Data/Xt",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Xp,3), file=paste("./Data/Xp",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Zp,3), file=paste("./Data/Zp",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Zt,3), file=paste("./Data/Zt",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Xts,3), file=paste("./Data/Xts",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Xps,3), file=paste("./Data/Xps",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Zps,3), file=paste("./Data/Zps",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) write.table(round(Zts,3), file=paste("./Data/Zts",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T) } ### Compute cross-validation, we investigates the 3 proposed scores for various sparsity parameter values (m=80) ### xxxcvmatcc<- matrix(0,nrow=3,ncol=19,dimnames=list(c('Score 1','Score 2','Score 3'),c('0.01','0.02','0.03','0.04','0.05','0.06','0.07','0.08','0.09','0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9','1'))) for (xxxi in 1:19){ if (xxxi<10){ xxxc <- 0.01 * xxxi } if (xxxi>9){ xxxc <- 0.1 * (xxxi - 9) } prediction_1 <- c() prediction_2 <- c() prediction_3 <- c() for (xxxr in 1:5){# For each of the five training sets # Load the training and test set Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep=""))) Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep=""))) Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep=""))) Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep=""))) Xts <- as.matrix(read.delim(paste("./Data/Xts",xxxr,".txt",sep=""))) Zts <- as.matrix(read.delim(paste("./Data/Zts",xxxr,".txt",sep=""))) Xps <- as.matrix(read.delim(paste("./Data/Xps",xxxr,".txt",sep=""))) Zps <- as.matrix(read.delim(paste("./Data/Zps",xxxr,".txt",sep=""))) zfreq <- apply(Zt,2,sum) # Calculate training mean and sd xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean) xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd) xsd[xsd==0] <- 1; zsd[zsd==0] <- 1 # Run SCCA result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=xxxc, penaltyz=xxxc, niter=15, K=80, trace=T, standardize=F) # Gather prediction scores profiles prediction_1 <- rbind(prediction_1,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_1, nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2 <- rbind(prediction_2,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_3 <- rbind(prediction_3,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_3 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) } # Re order score profiles prediction <- arrange_rows(Z,prediction_1,prediction_2,prediction_3,prediction_4,prediction_5) # Compute AUC xxxcvmatcc[1,xxxi] <- AUC_score(prediction$s1,Z)$b xxxcvmatcc[2,xxxi] <- AUC_score(prediction$s2,Z)$b xxxcvmatcc[3,xxxi] <- AUC_score(prediction$s3,Z)$b # Save the results write.table(round(xxxcvmatcc,4), file="./xxxcvmatcc_scores.txt", quote=F, sep="\t", row.names=T, col.names=T) } ### Results ### round(xxxcvmatcc,4) #0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 #Score 1 0.8884 0.8884 0.8805 0.8706 0.8379 0.7966 0.7406 0.6363 0.5755 0.5569 0.604 0.6148 0.6162 0.6188 0.6195 0.6194 0.6194 0.6194 0.6194 #Score 2 0.8938 0.8938 0.8939 0.894 0.8939 0.8936 0.8922 0.8905 0.8881 0.8836 0.8037 0.8092 0.8404 0.8542 0.857 0.8584 0.8563 0.8563 0.8563 #Score 3 0.8917 0.8917 0.8914 0.8912 0.8869 0.8855 0.8805 0.8673 0.8569 0.8432 0.7229 0.7213 0.7266 0.7465 0.7654 0.796 0.799 0.799 0.799 ### Compute cross-validation, we select score 2. We investigate different values for c(sparsity) and d(number of components) parameters ### xxxcvmatcc<- matrix(0,nrow=10,ncol=10,dimnames=list(c('10','20','30','40','50','60','70','80','90','100'),c('0.01','0.02','0.03','0.04','0.05','0.06','0.07','0.08','0.09','0.1'))) for (xxxi in 1:10){ if (xxxi<10){ xxxc <- 0.01 * xxxi } if (xxxi>9){ xxxc <- 0.1 * (xxxi - 9) } # Initialise prediction scores profiles prediction_2_10 <- c() prediction_2_20 <- c() prediction_2_30 <- c() prediction_2_40 <- c() prediction_2_50 <- c() prediction_2_60 <- c() prediction_2_70 <- c() prediction_2_80 <- c() prediction_2_90 <- c() prediction_2_100 <- c() for (xxxr in 1:5){# For each of the five training sets # Load the training and test set Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep=""))) Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep=""))) Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep=""))) Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep=""))) Xts <- as.matrix(read.delim(paste("./Data/Xts",xxxr,".txt",sep=""))) Zts <- as.matrix(read.delim(paste("./Data/Zts",xxxr,".txt",sep=""))) Xps <- as.matrix(read.delim(paste("./Data/Xps",xxxr,".txt",sep=""))) Zps <- as.matrix(read.delim(paste("./Data/Zps",xxxr,".txt",sep=""))) zfreq <- apply(Zt,2,sum) # Calculate training mean and sd xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean) xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd) xsd[xsd==0] <- 1; zsd[zsd==0] <- 1 # Run SCCA result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=xxxc, penaltyz=xxxc, niter=15, K=100, trace=T, standardize=F) # Gather prediction scores profiles prediction_2_10 <- rbind(prediction_2_10,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:10],result_s.sca$v[,1:10] ,result_s.sca$cors[1:10],result_s.sca$d[1:10],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_20 <- rbind(prediction_2_20,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:20],result_s.sca$v[,1:20] ,result_s.sca$cors[1:20],result_s.sca$d[1:20],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_30 <- rbind(prediction_2_30,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:30],result_s.sca$v[,1:30] ,result_s.sca$cors[1:30],result_s.sca$d[1:30],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_40 <- rbind(prediction_2_40,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:40],result_s.sca$v[,1:40] ,result_s.sca$cors[1:40],result_s.sca$d[1:40],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_50 <- rbind(prediction_2_50,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:50],result_s.sca$v[,1:50] ,result_s.sca$cors[1:50],result_s.sca$d[1:50],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_60 <- rbind(prediction_2_60,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:60],result_s.sca$v[,1:60] ,result_s.sca$cors[1:60],result_s.sca$d[1:60],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_70 <- rbind(prediction_2_70,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:70],result_s.sca$v[,1:70] ,result_s.sca$cors[1:70],result_s.sca$d[1:70],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_80 <- rbind(prediction_2_80,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:80],result_s.sca$v[,1:80] ,result_s.sca$cors[1:80],result_s.sca$d[1:80],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_90 <- rbind(prediction_2_90,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:90],result_s.sca$v[,1:90] ,result_s.sca$cors[1:90],result_s.sca$d[1:90],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) prediction_2_100 <- rbind(prediction_2_100,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:100],result_s.sca$v[,1:100] ,result_s.sca$cors[1:100],result_s.sca$d[1:100],zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) } # Re order score profiles prediction_10 <- arrange_rows_single(Z,prediction_2_10) prediction_20 <- arrange_rows_single(Z,prediction_2_20) prediction_30 <- arrange_rows_single(Z,prediction_2_30) prediction_40 <- arrange_rows_single(Z,prediction_2_40) prediction_50 <- arrange_rows_single(Z,prediction_2_50) prediction_60 <- arrange_rows_single(Z,prediction_2_60) prediction_70 <- arrange_rows_single(Z,prediction_2_70) prediction_80 <- arrange_rows_single(Z,prediction_2_80) prediction_90 <- arrange_rows_single(Z,prediction_2_90) prediction_100 <- arrange_rows_single(Z,prediction_2_100) # Compute AUC xxxcvmatcc[1,xxxi] <- AUC_score(prediction_10,Z)$b xxxcvmatcc[2,xxxi] <- AUC_score(prediction_20,Z)$b xxxcvmatcc[3,xxxi] <- AUC_score(prediction_30,Z)$b xxxcvmatcc[4,xxxi] <- AUC_score(prediction_40,Z)$b xxxcvmatcc[5,xxxi] <- AUC_score(prediction_50,Z)$b xxxcvmatcc[6,xxxi] <- AUC_score(prediction_60,Z)$b xxxcvmatcc[7,xxxi] <- AUC_score(prediction_70,Z)$b xxxcvmatcc[8,xxxi] <- AUC_score(prediction_80,Z)$b xxxcvmatcc[9,xxxi] <- AUC_score(prediction_90,Z)$b xxxcvmatcc[10,xxxi] <- AUC_score(prediction_100,Z)$b # Save the results write.table(round(xxxcvmatcc,4), file="./xxxcvmatcc_scores_d_c.txt", quote=F, sep="\t", row.names=T, col.names=T) } round(xxxcvmatcc,4) #0.01 0.02 0.03 0.04 0.05 0.06 0.07 0.08 0.09 0.1 #10 0.894 0.894 0.8941 0.8942 0.8945 0.8946 0.8943 0.8945 0.8945 0.8938 #20 0.8939 0.8939 0.894 0.8942 0.8943 0.8945 0.8943 0.8943 0.8944 0.8936 #30 0.8939 0.8939 0.894 0.8942 0.8943 0.8943 0.894 0.894 0.894 0.8928 #40 0.8939 0.8939 0.894 0.8942 0.8941 0.8941 0.8939 0.8936 0.8935 0.8909 #50 0.8939 0.8939 0.894 0.8941 0.8941 0.894 0.8938 0.8933 0.8929 0.8897 #60 0.8939 0.8939 0.894 0.8942 0.894 0.8939 0.8934 0.893 0.8921 0.8886 #70 0.8938 0.8938 0.894 0.8942 0.8941 0.8938 0.8932 0.8918 0.8901 0.8862 #80 0.8938 0.8938 0.8939 0.894 0.8939 0.8936 0.8926 0.8903 0.888 0.8832 #90 0.8938 0.8938 0.8939 0.894 0.8938 0.8935 0.8921 0.8895 0.8858 0.8785 #100 0.8938 0.8938 0.8939 0.894 0.8938 0.8932 0.8918 0.8888 0.8843 0.8748 #### plot ROC curves for SCCA and OCCA cv_prediction_s <- c() cv_prediction_o <- c() for (xxxr in 1:5){# For each of the five training sets # Initialise all variables # Load the training and test set Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep=""))) Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep=""))) Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep=""))) Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep=""))) zfreq <- apply(Zt,2,sum) # normatlization for X and Y based on training mean and sd xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean) xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd) xsd[xsd==0] <- 1; zsd[zsd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) Xts <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T) Zts <- Zt - matrix(zmean, nrow(Zt), ncol(Zt), byrow=T) Xps <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T) Zps <- Zp - matrix(zmean, nrow(Zp), ncol(Zp), byrow=T) Xts <- Xts * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T) Zts <- Zts * matrix(1/zsd, nrow(Zt), ncol(Zt), byrow=T) Xps <- Xps * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T) Zps <- Zps * matrix(1/zsd, nrow(Zp), ncol(Zp), byrow=T) result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=0.04, penaltyz=0.04, niter=15, K=80, trace=T, standardize=F) result_s.oca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=1, penaltyz=1, niter=15, K=80, trace=T, standardize=F) cv_prediction_s <- rbind(cv_prediction_s,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) cv_prediction_o <- rbind(cv_prediction_o,matrix(Scores_component(Xps,Zps,Zts,result_s.oca$u,result_s.oca$v,result_s.oca$cors,result_s.oca$d,zmean,zsd)$score_2 ,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp)))) } cv_prediction_s <- arrange_rows_single(Z,cv_prediction_s) cv_prediction_o <- arrange_rows_single(Z,cv_prediction_o) ROC_s <- AUC_score(cv_prediction_s,Z) ROC_o <- AUC_score(cv_prediction_o,Z) plot(ROC_s$score[2,], ROC_s$score[1,], pch=1,xlim=c(0,1),ylim=c(0,1),xlab="", ylab="", main=""); par(new=T) plot(ROC_o$score[2,], ROC_o$score[1,], pch=5,xlim=c(0,1),ylim=c(0,1),xlab="", ylab="", main=""); par(new=T) plot(c(0,1), c(0,1), type="n", xlab="False positive rate", ylab="True positive rate", main="ROC curves") legend(0.7, 0.6, "SCCA", pch=1) legend(0.7, 0.45, "OCCA", pch=5) ### Run SCCA ### # normatlization for X and Z xmean <- apply(X,2,mean); zmean <- apply(Z,2,mean) xsd <- apply(X,2,sd); zsd <- apply(Z,2,sd) xsd[xsd==0] <- 1; zsd[zsd==0] <- 1 #Xt <- scale(Xt); Yt <- scale(Yt) Xs <- X - matrix(xmean, nrow(X), ncol(X), byrow=T) Zs <- Z - matrix(zmean, nrow(Z), ncol(Z), byrow=T) Xs <- Xs * matrix(1/xsd, nrow(X), ncol(X), byrow=T) Zs <- Zs * matrix(1/zsd, nrow(Z), ncol(Z), byrow=T) res <- CCA(Xs,Zs,typex="standard", typez="standard", penaltyx=0.04, penaltyz=0.04, K=80, niter=30, standardize= FALSE, trace=TRUE) # Arange sign for (k in 1:80){if (max(abs(res$u[,k])) != max(res$u[,k])){res$v[,k] = - res$v[,k]; res$u[,k] = - res$u[,k]}} scorex <- Xs %*% res$u scorez <- Zs %*% res$v op <- par(mfcol=c(3,2)) plot(res$u[,1], xlab="Substructure index", ylab="Weight", main="Component 1") plot(res$u[,2], xlab="Substructure index", ylab="Weight", main="Component 2") plot(res$u[,3], xlab="Substructure index", ylab="Weight", main="Component 3") plot(res$v[,1], xlab="Effect index", ylab="Weight", main="Component 1") plot(res$v[,2], xlab="Effect index", ylab="Weight", main="Component 2") plot(res$v[,3], xlab="Effect index", ylab="Weight", main="Component 3") par(op) # Store SCCA results write.table(round(colsumvec.x,3), file="./colsumvec.x.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(colsumvec.z,3), file="./colsumvec.z.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(res$u[,1:80],3), file="./result_scca_subst.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(res$v[,1:80],3), file="./result_scca_effect.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(scorex[,1:80],3), file="./result_scca_score_drugx.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(scorez[,1:80],3), file="./result_scca_score_drugz.txt", quote=F, sep="\t", row.names=T, col.names=F) write.table(round(res$cors[1:80],3), file="./cors.txt", quote=F, sep="\t", row.names=F, col.names=F)