################################################################
#This program file:                                            #
#Uses the output of "r-normalisation.txt"                      #
#Computes t-statistics                                         #
#Regularized t-statistics                                      #
#SAM method                                                    #
#Weighted Resampling (WR) method                               #
#Computes FDR                                                  #
#Computes the null-scores for mixture model approach           #  
################################################################

mx=array(0,c(nrg))
my=array(0,c(nrg))
s=array(0,c(nrg))
d=array(0,c(nrg))
zmx=array(0,c(nrg))
zmy=array(0,c(nrg))
zs=array(0,c(nrg))
zd=array(0,c(nrg))
sest=array(0,c(nrg))
sav=array(0,c(nrg))
w=array(0,c(nrg))
regd=array(0,c(nrg))
zregd=array(0,c(nrg))
dE=array(0,c(nrg))

sg=array(0,c(nrg,nr))
de=array(0,c(nrg,nr))
regde=array(0,c(nrg,nr))
tde=array(0,c(nrg,nr))

#approximating the variance from the "intensity vs variance curve" 

intensity=(xx+yy)/2
ord<-sort(intensity,index.return=TRUE)
for (i in 1:nrg){
mx[i]=mean(x[i,])
my[i]=mean(y[i,])
s[i]=sqrt(sd(x[i,])^2/nc+sd(y[i,])^2/nc)
}
ordsd=s[ord$ix]

#window size 50

ws=50
aa=1:nrg
all=cbind(x,y)

#computes the t-statistics and the regularized t-statistics

for (i in 1:nrg){
d[i]=(mx[i]-my[i])/s[i]
ii=aa[ord$ix==i]
lw=max(0,ii-ws)
uw=min(nrg,ii+ws)
sav[i]=mean(ordsd[lw:uw])
regd[i]=(mx[i]-my[i])/sqrt((nc-1)/10*s[i]^2+(1-(nc-1)/10)*sav[i]^2)
}

##############SAM method##############

for (i in 1:nrg){
mx[i]=mean(x[i,])
my[i]=mean(y[i,])
s[i]=sqrt(sd(x[i,])^2/4+sd(y[i,])^2/4)
}

#computes s_0

const=100
q=array(0,c(const+1))
samd=array(0,c(nrg,const))
for (j in 1:const+1){
q[j]=quantile(s,(j-1)/const)
}

qq=array(0,c(20))
samd=array(0,c(nrg,20))
for (j in 1:21){
qq[j]=quantile(s,(j-1)/20)
}

samscore=array(0,c(nrg))
samd=array(0,c(nrg,20))
smd=array(0,c(nrg))

for (i in 1:nrg){
for (j in 1:20){
samd[i,j]=(mx[i]-my[i])/(s[i]+qq[j])
}
}

v=array(0,c(const))
cv=array(0,c(20))
for (k in 1:20){
for (j in 1:const){
v[j]=mad(samd[s>=q[j] & s<q[j+1],k])
}
cv[k]=sd(v)/mean(v)
}

min=Inf
minix=1
for (k in 1:20){
if (cv[k]<min){
minix=k
min=cv[k]
}
}

s0=qq[minix]

for (i in 1:nrg){smd[i]=samd[i,minix]}

#mimics null-scores

ns=50
dee=array(0,c(nrg,ns))
sdee=array(0,c(nrg,ns))
sde=array(0,c(nrg))
de=array(0,c(nrg))
for (i in 1:nrg){
for (j in 1:ns){
temp1=sample(1:4,2)
temp2=sample(5:8,2)
sg1=union(temp1,temp2)
sg2=setdiff(1:8,sg1)
dee[i,j]=(mean(all[i,sg1])-mean(all[i,sg2]))/(sqrt(sd(all[i,sg1])^2/4+sd(all[i,sg2])^2/4)+s0)
}
de[i]=mean(dee[i,])
}


for (j in 1:50){sdee[,j]=sort(dee[,j])}
for (i in 1:nrg){sde[i]=mean(sdee[i,])}

sssam=sort(smd,index.return=TRUE)
ssam=sssam$x
sorr=sssam$ix
plot(sde,ssam)

con=70
FDRsam=array(0,c(con))
nrpicked=array(0,c(con))
kul=abs(ssam-sde)

#computes the FDR for SAM

cutoff=array(0,c(con))
delt=1:con
delt=delt/con
for (i in 1:con){
cutoff[i]=max(ssam[which(abs(ssam-sde)>delt[i] & ssam< 0)])
tru=length(which(ssam< cutoff[i]))
fls=length(which(dee< cutoff[i]))/50
FDRsam[i]=min(fls/tru,1)
nrpicked[i]=length(which(smd[find]< cutoff[i]))
}

cbind(FDRsam,cutoff,nrpicked)

library(stepfun)
dz=ecdf(smd)
rsmd=dz(smd)

length(which(rsmd[find] < 0.02))


###################WR METHOD############################

znr=nr1*nr2
zsg=array(0,c(nrg,znr))
zde=array(0,c(nrg,znr))
mgx=array(0,c(nrg,nr))
mgy=array(0,c(nrg,nr))
zmgx=array(0,c(nrg,znr))
zmgy=array(0,c(nrg,znr))

#resamples the original sample

for (i in 1:nrg){
for (j in 1:nr1){
for (k in 1:nr2){
zmgx[i,k+(j-1)*nr1]=mean(x[i,-j]) 
zmgy[i,k+(j-1)*nr2]=mean(y[i,-k])
zsg[i,k+(j-1)*nr1]=sqrt(sd(x[i,-j])^2/znc+sd(y[i,-k])^2/znc)
}
}
}

#finds out the within subgroup variation relative to other resamplings

o=array(0,c(nrg,znr))
q=array(0,c(nrg,znr))
ww=array(0,c(nrg,znr))
for (i in 1:nrg){
for (j in 1:znr){
ww[i,j]=(zsg[i,j]-mean(zsg[i,-j]))/mean(zsg[i,-j])
}
ind=sort(ww[i,],index.return=TRUE)
o[i,]=ind$ix
q[i,]=ind$x
}

#defines the prior standard deviation

s00=median(zsg)

#computes the regularized t-statistics for each resampling

for (i in 1:nrg){
for (j in 1:znr){
zde[i,j]=(zmgx[i,j]-zmgy[i,j])/(0.3*zsg[i,j]+0.7*s00)
}
}

#assigns the weights

we=array(0,c(znr))
for (i in 1:znr){
we[i]=length(which(o[,1]==i))
}

kzde=array(0,c(nrg))

for (i in 1:nrg){
if (!is.na(q[i,1])) {
if (q[i,1]< -0.5) kzde[i]=(sum(zde[i,])+8*zde[i,o[i,1]])/(znr+8)
else kzde[i]=mean(zde[i,])
}
}

#determines the top z% of the genes according to each scoring

library(stepfun)
dz=ecdf(kzde)
rzde=dz(kzde)

dz=ecdf(regd)
rregd=dz(regd)

dz=ecdf(d)
rrd=dz(d)

dz=ecdf(smd)
rsmd=dz(smd)

frsam=array(0,c(con))
frrt=array(0,c(con))
frwr=array(0,c(con))


trh=1:con
trh=trh/(10*con)
for (i in 1:con){
frsam[i]=length(which(rsmd[find]< trh[i]))
frrt[i]=length(which(rregd[find]< trh[i]))
frwr[i]=length(which(rzde[find]< trh[i]))
}

#produces "ROC" curve

plot(trh,frwr,type='l',col='red',xlim=c(0,0.1),lty=1,lwd=4,xlab="proportion of total genome picked",ylab="number of RD genes found among them")
legend(0.06,40,c("WR","RT","SAM"),lty=c(1,2,3))
lines(trh,frrt,lty=2,lwd=4)
lines(trh,frsam,lty=3,lwd=4)

################FDR###############################

ns=100
regde=array(0,c(nrg,ns))
tde=array(0,c(nrg,ns))
wrdee=array(0,c(nrg,ns))
fdrsg=array(0,c(nrg,ns))

#mimics null-scores 

for (i in 1:nrg){
for (j in 1:ns){
sg1=sample(1:(2*nr1),nr1)
sg2=setdiff(1:(2*nr1),sg1)
for (k in 1:nr1){
for (l in 1:nr2){
s1=sg1[setdiff(1:nr1,k)]
s2=sg2[setdiff(1:nr2,l)]
wrdee[i,j]=wrdee[i,j]+(mean(all[i,s1])-mean(all[i,s2]))/sqrt(0.3*(sd(all[i,s1])^2/znc+sd(all[i,s2])^2/znc)+0.7*s00^2)
}
}
wrdee[i,j]=wrdee[i,j]/(nr1*nr2)
fdrsg[i,j]=sqrt(sd(all[i,sg1])^2/nc+sd(all[i,sg2])^2/nc)
regde[i,j]=(mean(all[i,sg1])-mean(all[i,sg2]))/sqrt((nc-1)/10*fdrsg[i,j]^2+(1-(nc-1)/10)*sav[i]^2)
tde[i,j]=(mean(all[i,sg1])-mean(all[i,sg2]))/fdrsg[i,j]
}
}

const=70
FDRt=array(0,c(const))
FDRrt=array(0,c(const))
FDRwr=array(0,c(const))
a1=array(0,c(const))
a2=array(0,c(const))
b1=array(0,c(const))
b2=array(0,c(const))
c1=array(0,c(const))
c2=array(0,c(const))
kuszob1=1:const
kuszob1=kuszob1/10

#fix Pi_0=0.9

for (i in 1:const){
a1[i]=length(which(regde < -kuszob1[i]))/(ns*nrg)
a2[i]=length(which(regd < -kuszob1[i]))/nrg
b1[i]=length(which(tde < -kuszob1[i]))/(ns*nrg)
b2[i]=length(which(d < -kuszob1[i]))/nrg
c1[i]=length(which(wrdee < -kuszob1[i]))/(ns*nrg)
c2[i]=length(which(kzde < -kuszob1[i]))/nrg
FDRt[i]=min(0.9*b1[i]/b2[i],1)
FDRrt[i]=min(0.9*a1[i]/a2[i],1)
FDRwr[i]=min(0.9*c1[i]/c2[i],1)
}

########


#alternative Pi_0

kuszob2=1.5
a3=length(which(regd > -kuszob2 & regd<0))/nrg
a4=length(which(regde > -kuszob2 & regde<0))/(ns*nrg)
b3=length(which(d > -kuszob2 & d<0))/nrg
b4=length(which(tde > -kuszob2 & tde<0))/(ns*nrg)
c3=length(which(kzde > -kuszob2 & kzde<0))/nrg
c4=length(which(wrdee > -kuszob2 & wrdee<0))/(ns*nrg)
a3/a4
b3/b4
c3/c4

#plots threshold against FDR for each method

plot(kuszob1,FDRt,xlim=c(0,7),ylim=c(0,1),type="l")
lines(kuszob1,FDRrt,col='green')
lines(kuszob1,FDRwr,col='red')

FDRwr1=array(0,c(const))
FDRrt1=array(0,c(const))
FDRsam1=array(0,c(const))

for (i in 1:const){
FDRwr1[i]=length(which(kzde[find]< -kuszob1[i]))
FDRrt1[i]=length(which(regd[find]< -kuszob1[i]))
FDRsam1[i]=length(which(smd[find]< -kuszob1[i]))
}

#plots the ROC curve

plot(FDRwr,FDRwr1/73,type='l',lwd=4,lty=1,col='red',xlim=c(0,1),ylim=c(0.3,1),xlab="FDR",ylab="True positive rate (sensitivity)")
legend(0.5,0.6,c("WR","RT","SAM"),lty=c(1,2,3))
lines(FDRrt,FDRrt1/73,lwd=4,lty=2)
lines(FDRsam,nrpicked/73,lwd=4,lty=3)


ReducedGeneId[find[which(regd[find]< -3.6)]]
ReducedGeneId[find[which(kzde[find]< -3.5)]]
ReducedGeneId[find[which(smd[find]< -1.7)]]

########PAN############

#creating the null-scores

pan0=array(0,c(nrg))
pan1=array(0,c(nrg))
for (i in 1:nrg){
temp1=sample(1:4,2)
temp2=setdiff(5:8,2)
sg1=union(temp1,temp2)
sg2=setdiff(1:8,sg1)
pan0[i]=(mean(all[i,sg1])-mean(all[i,sg2]))/sqrt(sd(all[i,sg1])^2/4+sd(all[i,sg2])^2/4)
pan1[i]=(mean(x[i,])-mean(y[i,]))/sqrt(sd(x[i,])^2/4+sd(y[i,])^2/4)
}