#####
#####  SE_scripts.txt (ver. 1.0 (100531))
#####  
#####  Supplementary codes for 
#####  "Predicting drug side-effects: a chemical fragment-based approach"
#####   by Pauwels, E., Stoven,V., and Yamanishi, Y. 
#####
#####   Copyright(c)2010 Edouard Pauwels. All rights reservied.  
#####
#####   LICENSING TERMS: 
#####   This program is free of charge for research and education purposes. 
#####   However you must obtain a license from the author to use it for commercial purposes. 
#####   The software must not be distributed without prior permission of the author.
#####   
#####   NO WARRANTY: 
#####   This software is supplied "as is" without warranty of any kind, 
#####   and its authors disclaim any and all warranties, 
#####   including but not limited to any implied warranties of merchantability and fitness for a particular purpose, 
#####   and any warranties or non infringement. 
#####   The user assumes all liability and responsibility for use of this software
#####   and in no event shall its authors be liable for damages of any kind resulting from its use.
#####


### Define functions ###

AUC_score <- function(scores,Zp){
#### Given a score profile and a binary positive/negative matrix/vector/row,
#### the function calculates the area under the ROC curve (AUC) for 20 thresholds.

	tpositive <- which(Zp==1)
	tnegative <- which(Zp==0)
	
	a <- scores	
	
	thrs <- sort(c(0,quantile(a[which(a!=0)],(0:20)/20)))
	score <- matrix(0,2,length(thrs))
	for (step in 1:length(thrs)){
		thresh <- thrs[step]
		ppositive <- which(scores>=thresh)
		score[1,step] <- length(intersect(ppositive,tpositive))/length(tpositive)
		score[2,step] <- length(intersect(ppositive,tnegative))/length(tnegative)
	}
	b <- 0
	for (l in 2:length(thrs)){
		print(b)
		b <- b+(score[1,l]+score[1,l-1])*abs(score[2,l]-score[2,l-1])/2
	}
	res <- list()
	res$b <- b
	res$score <- score
	res
}


Scores_component <- function(Xps,Zps,Zts,U,V,cors,d,mean,sd){
### Computes three different scores profiles given SCCA results(U,V,cors,d,mean,sd), training and test profile 
### matrix(Xps,Zts) and training data(mean, sd), Zps is not used ###

	ndrug <- dim(Xps)[1]
	nword <- dim(Zps)[2]
	nK <- dim(U)[2]

	a <- Xps %*% U 

	### Score 1 based on singular values
	score_1 <- a %*% diag(d) %*% t(V) %*% diag(sd) * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T)

	### Score 2 based on correlation values
	score_2 <- a %*% diag(cors) %*% t(V) * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T)
	
	b <- svd(V)
	
	b$d[b$d < 0.05] <- 0
	for (i in 1:length(b$d)){
		if (b$d[i]!=0){
			b$d[i] <- 1/b$d[i]
		}
	}
	
	### Score_3 based on pseudo inverse.
	score_3 <-   a %*% b$v %*% diag(b$d)  %*% t(b$u)  * matrix(sd, nrow(Xps), ncol(Zps), byrow=T) + matrix(mean, nrow(Xps), ncol(Zps), byrow=T)
	
	res <- list()
	res$score_1 <- score_1
	res$score_2 <- score_2
	res$score_3 <- score_3
	res
}

arrange_rows <- function(Z,Z_score_1,Z_score_2,Z_score_3){
### Sorts rows of score matrixes the same way as Z ones according to row names ###

	score_names <- rownames(Z_score_1)
	training_names <- rownames(Z)
	
	out_1 <- c()
	out_2 <- c()
	out_3 <- c()

	
	for (i in 1:length(training_names)){
		name <- training_names[i]
		place <- score_names==name
		out_1 <- rbind(out_1,Z_score_1[place,])
		out_2 <- rbind(out_2,Z_score_2[place,])
		out_3 <- rbind(out_3,Z_score_3[place,])
	}

	res <- list()
	res$s1 <- out_1
	res$s2 <- out_2
	res$s3 <- out_3
	res
}

arrange_rows_single <- function(Z,Z_score){
### Sorts rows of score matrix the same way as Z ones according to row names ###

	score_names <- rownames(Z_score)
	training_names <- rownames(Z)
	
	out_1 <- c()

	
	for (i in 1:length(training_names)){
		name <- training_names[i]
		place <- score_names==name
		out_1 <- rbind(out_1,Z_score[place,])
	}

	res <- out_1
	res
}

### Functions from PMA package ###

CheckVs <- function(v,x,z,K){ # If v is NULL, then get v as appropriate.
  if(!is.null(v) && !is.matrix(v)) v <- matrix(v,nrow=ncol(z))
  if(!is.null(v) && ncol(v)<K) v <- NULL
  if(!is.null(v) && ncol(v)>K) v <- matrix(v[,1:K],ncol=K)
  if(is.null(v) && ncol(z)>nrow(z) && ncol(x)>nrow(x)){
    v <- matrix(fastsvd(x,z)$v[,1:K],ncol=K)
  } else if (is.null(v) && (ncol(z)<=nrow(z) || ncol(x)<=nrow(x))){
    v <- matrix(svd(t(x)%*%z)$v[,1:K],ncol=K)
  }
  return(v)
}

fastsvd <- function(x,z){
  # fast svd of t(x)%*%z, where ncol(x)>>nrow(x) and same for z
  xx=x%*%t(x)
  xx2=msqrt(xx)
  y=t(z)%*%xx2
  a=svd(y)
  v=a$u
  d=a$d
  zz=z%*%t(z)
  zz2=msqrt(zz)
  y=t(x)%*%zz2
  a=svd(y)
  u=a$u
  return(list(u=u,v=v,d=d))
}

msqrt <- function(x){
  eigenx <- eigen(x)
  return(eigenx$vectors%*%diag(sqrt(pmax(0,eigenx$values)))%*%t(eigenx$vectors))
}


library(PMA)


### Load dataset ###

Z <- as.matrix(read.delim("./Z.txt"))
X <- as.matrix(read.delim("./X.txt"))
Xtest <- as.matrix(read.delim("Test_set_fingerprints.txt"))

### Standadridisation requires to delete null columns which values are all the same (no information)

colsumvec.x <- apply(X,2,sum)
X <- X[,colsumvec.x>0]
Xtest <- Xtest[,colsumvec.x>0]

colsumvec.z <- apply(Z,2,sum)
Z <- Z[,colsumvec.z>0]
colsumvec.z <- apply(Z,2,sum)
colsumvec.x <- apply(X,2,sum)


### Parameters to build cross validation sets ###
n <- nrow(X);  px <- ncol(X);  pz <- ncol(Z); nabs <- n
rsize <- trunc(n / 5)
index <- 1:n


# Create 5 training and test sets based on the entire dataset

for (r in 1:5){
	
	#Delete ids from the index list
	rindex <- sample(index, size=trunc(n/(6 - r)))    
	pindex.r <- rindex
	tindex.r <- (1:nabs)[-pindex.r]

	
	# Build training and test sets
	Xt <- X[tindex.r, ]
    Zt <- Z[tindex.r, ]
    Xp <- X[pindex.r, ]
    Zp <- Z[pindex.r, ]
	
	#Update index and number of ids left
	index <- index[!index%in%pindex.r]
	n <- length(index)

	# normatlization for X and Z based on training mean and sd
	xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean)
	xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd)
	xsd[xsd==0] <- 1; zsd[zsd==0] <- 1
	#Xt <- scale(Xt); Yt <- scale(Yt)
	
	Xts <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T)
	Zts <- Zt - matrix(zmean, nrow(Zt), ncol(Zt), byrow=T)
	Xps <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T)
	Zps <- Zp - matrix(zmean, nrow(Zp), ncol(Zp), byrow=T)


	Xts <- Xts * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T)
	Zts <- Zts * matrix(1/zsd, nrow(Zt), ncol(Zt), byrow=T)
	Xps <- Xps * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T)
	Zps <- Zps * matrix(1/zsd, nrow(Zp), ncol(Zp), byrow=T)
	
	
	# Save the results
	
	write.table(round(Xt,3), file=paste("./Data/Xt",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Xp,3), file=paste("./Data/Xp",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Zp,3), file=paste("./Data/Zp",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Zt,3), file=paste("./Data/Zt",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	
	write.table(round(Xts,3), file=paste("./Data/Xts",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Xps,3), file=paste("./Data/Xps",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Zps,3), file=paste("./Data/Zps",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	write.table(round(Zts,3), file=paste("./Data/Zts",r,".txt",sep=""), quote=F, sep="\t", row.names=T, col.names=T)
	

}

### Compute cross-validation, we investigates the 3 proposed scores for various sparsity parameter values (m=80) ###

xxxcvmatcc<- matrix(0,nrow=3,ncol=19,dimnames=list(c('Score 1','Score 2','Score 3'),c('0.01','0.02','0.03','0.04','0.05','0.06','0.07','0.08','0.09','0.1','0.2','0.3','0.4','0.5','0.6','0.7','0.8','0.9','1')))
for (xxxi in 1:19){

	if (xxxi<10){
		xxxc <- 0.01 * xxxi
	}
	if (xxxi>9){
		xxxc <- 0.1 * (xxxi - 9)
	}
	
	prediction_1 <- c()
	prediction_2 <- c()
	prediction_3 <- c()


	for (xxxr in 1:5){# For each of the five training sets

		
		# Load the training and test set
		Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep="")))
		Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep="")))
		Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep="")))
		Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep="")))
		
		Xts <- as.matrix(read.delim(paste("./Data/Xts",xxxr,".txt",sep="")))
		Zts <- as.matrix(read.delim(paste("./Data/Zts",xxxr,".txt",sep="")))
		Xps <- as.matrix(read.delim(paste("./Data/Xps",xxxr,".txt",sep="")))
		Zps <- as.matrix(read.delim(paste("./Data/Zps",xxxr,".txt",sep="")))

		
		zfreq <- apply(Zt,2,sum)
		
		# Calculate training mean and sd
		xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean)
		xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd)
		xsd[xsd==0] <- 1; zsd[zsd==0] <- 1
		
		# Run SCCA
		result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=xxxc, penaltyz=xxxc, 
		niter=15, K=80, trace=T, standardize=F)
		
		# Gather prediction scores profiles
		prediction_1 <- rbind(prediction_1,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_1,
		nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		prediction_2 <- rbind(prediction_2,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		prediction_3 <- rbind(prediction_3,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_3
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
	}
	
	# Re order score profiles
	prediction <- arrange_rows(Z,prediction_1,prediction_2,prediction_3,prediction_4,prediction_5)

	# Compute AUC
	xxxcvmatcc[1,xxxi] <- AUC_score(prediction$s1,Z)$b
	xxxcvmatcc[2,xxxi] <- AUC_score(prediction$s2,Z)$b
	xxxcvmatcc[3,xxxi] <- AUC_score(prediction$s3,Z)$b

	# Save the results
	write.table(round(xxxcvmatcc,4), file="./xxxcvmatcc_scores.txt", quote=F, sep="\t", row.names=T, col.names=T)
}

### Results ###
round(xxxcvmatcc,4)

#0.01	0.02	0.03	0.04	0.05	0.06	0.07	0.08	0.09	0.1	0.2	0.3	0.4	0.5	0.6	0.7	0.8	0.9	1
#Score 1	0.8884	0.8884	0.8805	0.8706	0.8379	0.7966	0.7406	0.6363	0.5755	0.5569	0.604	0.6148	0.6162	0.6188	0.6195	0.6194	0.6194	0.6194	0.6194
#Score 2	0.8938	0.8938	0.8939	0.894	0.8939	0.8936	0.8922	0.8905	0.8881	0.8836	0.8037	0.8092	0.8404	0.8542	0.857	0.8584	0.8563	0.8563	0.8563
#Score 3	0.8917	0.8917	0.8914	0.8912	0.8869	0.8855	0.8805	0.8673	0.8569	0.8432	0.7229	0.7213	0.7266	0.7465	0.7654	0.796	0.799	0.799	0.799


### Compute cross-validation, we select score 2. We investigate different values for c(sparsity) and d(number of components) parameters ###

xxxcvmatcc<- matrix(0,nrow=10,ncol=10,dimnames=list(c('10','20','30','40','50','60','70','80','90','100'),c('0.01','0.02','0.03','0.04','0.05','0.06','0.07','0.08','0.09','0.1')))

for (xxxi in 1:10){
	

	if (xxxi<10){
		xxxc <- 0.01 * xxxi
	}
	if (xxxi>9){
		xxxc <- 0.1 * (xxxi - 9)
	}
	
	# Initialise prediction scores profiles
	prediction_2_10 <- c()
	prediction_2_20 <- c()
	prediction_2_30 <- c()
	prediction_2_40 <- c()
	prediction_2_50 <- c()
	prediction_2_60 <- c()
	prediction_2_70 <- c()
	prediction_2_80 <- c()
	prediction_2_90 <- c()
	prediction_2_100 <- c()


	for (xxxr in 1:5){# For each of the five training sets
		

		# Load the training and test set
		Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep="")))
		Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep="")))
		Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep="")))
		Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep="")))
		
		Xts <- as.matrix(read.delim(paste("./Data/Xts",xxxr,".txt",sep="")))
		Zts <- as.matrix(read.delim(paste("./Data/Zts",xxxr,".txt",sep="")))
		Xps <- as.matrix(read.delim(paste("./Data/Xps",xxxr,".txt",sep="")))
		Zps <- as.matrix(read.delim(paste("./Data/Zps",xxxr,".txt",sep="")))

		
		zfreq <- apply(Zt,2,sum)
		
		# Calculate training mean and sd
		xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean)
		xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd)
		xsd[xsd==0] <- 1; zsd[zsd==0] <- 1
		
		# Run SCCA
		result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=xxxc, penaltyz=xxxc, 
		niter=15, K=100, trace=T, standardize=F)
		
		# Gather prediction scores profiles
		prediction_2_10 <- rbind(prediction_2_10,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:10],result_s.sca$v[,1:10]
		,result_s.sca$cors[1:10],result_s.sca$d[1:10],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_20 <- rbind(prediction_2_20,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:20],result_s.sca$v[,1:20]
		,result_s.sca$cors[1:20],result_s.sca$d[1:20],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_30 <- rbind(prediction_2_30,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:30],result_s.sca$v[,1:30]
		,result_s.sca$cors[1:30],result_s.sca$d[1:30],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_40 <- rbind(prediction_2_40,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:40],result_s.sca$v[,1:40]
		,result_s.sca$cors[1:40],result_s.sca$d[1:40],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_50 <- rbind(prediction_2_50,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:50],result_s.sca$v[,1:50]
		,result_s.sca$cors[1:50],result_s.sca$d[1:50],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_60 <- rbind(prediction_2_60,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:60],result_s.sca$v[,1:60]
		,result_s.sca$cors[1:60],result_s.sca$d[1:60],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_70 <- rbind(prediction_2_70,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:70],result_s.sca$v[,1:70]
		,result_s.sca$cors[1:70],result_s.sca$d[1:70],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_80 <- rbind(prediction_2_80,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:80],result_s.sca$v[,1:80]
		,result_s.sca$cors[1:80],result_s.sca$d[1:80],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_90 <- rbind(prediction_2_90,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:90],result_s.sca$v[,1:90]
		,result_s.sca$cors[1:90],result_s.sca$d[1:90],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
		prediction_2_100 <- rbind(prediction_2_100,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u[,1:100],result_s.sca$v[,1:100]
		,result_s.sca$cors[1:100],result_s.sca$d[1:100],zmean,zsd)$score_2
		,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
		
	}
	
	# Re order score profiles
	prediction_10 <- arrange_rows_single(Z,prediction_2_10)
	prediction_20 <- arrange_rows_single(Z,prediction_2_20)
	prediction_30 <- arrange_rows_single(Z,prediction_2_30)	
	prediction_40 <- arrange_rows_single(Z,prediction_2_40)
	prediction_50 <- arrange_rows_single(Z,prediction_2_50)
	prediction_60 <- arrange_rows_single(Z,prediction_2_60)
	prediction_70 <- arrange_rows_single(Z,prediction_2_70)
	prediction_80 <- arrange_rows_single(Z,prediction_2_80)
	prediction_90 <- arrange_rows_single(Z,prediction_2_90)
	prediction_100 <- arrange_rows_single(Z,prediction_2_100)
	
	# Compute AUC
	xxxcvmatcc[1,xxxi] <- AUC_score(prediction_10,Z)$b
	xxxcvmatcc[2,xxxi] <- AUC_score(prediction_20,Z)$b
	xxxcvmatcc[3,xxxi] <- AUC_score(prediction_30,Z)$b
	xxxcvmatcc[4,xxxi] <- AUC_score(prediction_40,Z)$b
	xxxcvmatcc[5,xxxi] <- AUC_score(prediction_50,Z)$b
	xxxcvmatcc[6,xxxi] <- AUC_score(prediction_60,Z)$b
	xxxcvmatcc[7,xxxi] <- AUC_score(prediction_70,Z)$b
	xxxcvmatcc[8,xxxi] <- AUC_score(prediction_80,Z)$b
	xxxcvmatcc[9,xxxi] <- AUC_score(prediction_90,Z)$b
	xxxcvmatcc[10,xxxi] <- AUC_score(prediction_100,Z)$b
	
	# Save the results
	write.table(round(xxxcvmatcc,4), file="./xxxcvmatcc_scores_d_c.txt", quote=F, sep="\t", row.names=T, col.names=T)
}


round(xxxcvmatcc,4)
#0.01	0.02	0.03	0.04	0.05	0.06	0.07	0.08	0.09	0.1
#10	0.894	0.894	0.8941	0.8942	0.8945	0.8946	0.8943	0.8945	0.8945	0.8938
#20	0.8939	0.8939	0.894	0.8942	0.8943	0.8945	0.8943	0.8943	0.8944	0.8936
#30	0.8939	0.8939	0.894	0.8942	0.8943	0.8943	0.894	0.894	0.894	0.8928
#40	0.8939	0.8939	0.894	0.8942	0.8941	0.8941	0.8939	0.8936	0.8935	0.8909
#50	0.8939	0.8939	0.894	0.8941	0.8941	0.894	0.8938	0.8933	0.8929	0.8897
#60	0.8939	0.8939	0.894	0.8942	0.894	0.8939	0.8934	0.893	0.8921	0.8886
#70	0.8938	0.8938	0.894	0.8942	0.8941	0.8938	0.8932	0.8918	0.8901	0.8862
#80	0.8938	0.8938	0.8939	0.894	0.8939	0.8936	0.8926	0.8903	0.888	0.8832
#90	0.8938	0.8938	0.8939	0.894	0.8938	0.8935	0.8921	0.8895	0.8858	0.8785
#100	0.8938	0.8938	0.8939	0.894	0.8938	0.8932	0.8918	0.8888	0.8843	0.8748


#### plot ROC curves for SCCA and OCCA

cv_prediction_s <- c()
cv_prediction_o <- c()
for (xxxr in 1:5){# For each of the five training sets
	
	# Initialise all variables	
	
	# Load the training and test set
    Xt <- as.matrix(read.delim(paste("./Data/Xt",xxxr,".txt",sep="")))
	Zt <- as.matrix(read.delim(paste("./Data/Zt",xxxr,".txt",sep="")))
    Xp <- as.matrix(read.delim(paste("./Data/Xp",xxxr,".txt",sep="")))
	Zp <- as.matrix(read.delim(paste("./Data/Zp",xxxr,".txt",sep="")))	
	zfreq <- apply(Zt,2,sum)
	
	# normatlization for X and Y based on training mean and sd
    xmean <- apply(Xt,2,mean); zmean <- apply(Zt,2,mean)
    xsd <- apply(Xt,2,sd); zsd <- apply(Zt,2,sd)
    xsd[xsd==0] <- 1; zsd[zsd==0] <- 1
    #Xt <- scale(Xt); Yt <- scale(Yt)
	
	Xts <- Xt - matrix(xmean, nrow(Xt), ncol(Xt), byrow=T)
	Zts <- Zt - matrix(zmean, nrow(Zt), ncol(Zt), byrow=T)
	Xps <- Xp - matrix(xmean, nrow(Xp), ncol(Xp), byrow=T)
	Zps <- Zp - matrix(zmean, nrow(Zp), ncol(Zp), byrow=T)


	Xts <- Xts * matrix(1/xsd, nrow(Xt), ncol(Xt), byrow=T)
	Zts <- Zts * matrix(1/zsd, nrow(Zt), ncol(Zt), byrow=T)
	Xps <- Xps * matrix(1/xsd, nrow(Xp), ncol(Xp), byrow=T)
	Zps <- Zps * matrix(1/zsd, nrow(Zp), ncol(Zp), byrow=T)

	
	result_s.sca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=0.04, penaltyz=0.04, 
	niter=15, K=80, trace=T, standardize=F)
	result_s.oca <- CCA(x=Xts, z=Zts, typex="standard", typez="standard", penaltyx=1, penaltyz=1, 
	niter=15, K=80, trace=T, standardize=F)
	
	cv_prediction_s <- rbind(cv_prediction_s,matrix(Scores_component(Xps,Zps,Zts,result_s.sca$u,result_s.sca$v,result_s.sca$cors,result_s.sca$d,zmean,zsd)$score_2
	,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
	
	
	cv_prediction_o <- rbind(cv_prediction_o,matrix(Scores_component(Xps,Zps,Zts,result_s.oca$u,result_s.oca$v,result_s.oca$cors,result_s.oca$d,zmean,zsd)$score_2
	,nrow=dim(Zp)[1],ncol=dim(Zp)[2],dimnames=list(rownames(Zp),colnames(Zp))))
}

cv_prediction_s <- arrange_rows_single(Z,cv_prediction_s)
cv_prediction_o <- arrange_rows_single(Z,cv_prediction_o)

ROC_s <- AUC_score(cv_prediction_s,Z)
ROC_o <- AUC_score(cv_prediction_o,Z)

plot(ROC_s$score[2,], ROC_s$score[1,], pch=1,xlim=c(0,1),ylim=c(0,1),xlab="", ylab="", main=""); par(new=T)
plot(ROC_o$score[2,], ROC_o$score[1,], pch=5,xlim=c(0,1),ylim=c(0,1),xlab="", ylab="", main=""); par(new=T)
plot(c(0,1), c(0,1), type="n", xlab="False positive rate", ylab="True
positive rate", main="ROC curves")
legend(0.7, 0.6, "SCCA", pch=1)
legend(0.7, 0.45, "OCCA", pch=5)


### Run SCCA ###


# normatlization for X and Z
xmean <- apply(X,2,mean); zmean <- apply(Z,2,mean)
xsd <- apply(X,2,sd); zsd <- apply(Z,2,sd)
xsd[xsd==0] <- 1; zsd[zsd==0] <- 1
#Xt <- scale(Xt); Yt <- scale(Yt)

Xs <- X - matrix(xmean, nrow(X), ncol(X), byrow=T)
Zs <- Z - matrix(zmean, nrow(Z), ncol(Z), byrow=T)

Xs <- Xs * matrix(1/xsd, nrow(X), ncol(X), byrow=T)
Zs <- Zs * matrix(1/zsd, nrow(Z), ncol(Z), byrow=T)


res <- CCA(Xs,Zs,typex="standard", typez="standard", penaltyx=0.04, penaltyz=0.04, K=80, niter=30, standardize= FALSE, trace=TRUE)
# Arange sign
for (k in 1:80){if (max(abs(res$u[,k])) != max(res$u[,k])){res$v[,k] = - res$v[,k]; res$u[,k] = - res$u[,k]}}

scorex <- Xs %*% res$u
scorez <- Zs %*% res$v

op <- par(mfcol=c(3,2))
plot(res$u[,1], xlab="Substructure index", ylab="Weight", main="Component 1")
plot(res$u[,2], xlab="Substructure index", ylab="Weight", main="Component 2")
plot(res$u[,3], xlab="Substructure index", ylab="Weight", main="Component 3")
plot(res$v[,1], xlab="Effect index", ylab="Weight", main="Component 1")
plot(res$v[,2], xlab="Effect index", ylab="Weight", main="Component 2")
plot(res$v[,3], xlab="Effect index", ylab="Weight", main="Component 3")
par(op)


# Store SCCA results
write.table(round(colsumvec.x,3), file="./colsumvec.x.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(colsumvec.z,3), file="./colsumvec.z.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(res$u[,1:80],3), file="./result_scca_subst.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(res$v[,1:80],3), file="./result_scca_effect.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(scorex[,1:80],3), file="./result_scca_score_drugx.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(scorez[,1:80],3), file="./result_scca_score_drugz.txt", quote=F, sep="\t", row.names=T, col.names=F)
write.table(round(res$cors[1:80],3), file="./cors.txt", quote=F, sep="\t", row.names=F, col.names=F)