################################################################ #This program file: # #Uses the output of "r-normalisation.txt" # #Computes t-statistics # #Regularized t-statistics # #SAM method # #Weighted Resampling (WR) method # #Computes FDR # #Computes the null-scores for mixture model approach # ################################################################ mx=array(0,c(nrg)) my=array(0,c(nrg)) s=array(0,c(nrg)) d=array(0,c(nrg)) zmx=array(0,c(nrg)) zmy=array(0,c(nrg)) zs=array(0,c(nrg)) zd=array(0,c(nrg)) sest=array(0,c(nrg)) sav=array(0,c(nrg)) w=array(0,c(nrg)) regd=array(0,c(nrg)) zregd=array(0,c(nrg)) dE=array(0,c(nrg)) sg=array(0,c(nrg,nr)) de=array(0,c(nrg,nr)) regde=array(0,c(nrg,nr)) tde=array(0,c(nrg,nr)) #approximating the variance from the "intensity vs variance curve" intensity=(xx+yy)/2 ord<-sort(intensity,index.return=TRUE) for (i in 1:nrg){ mx[i]=mean(x[i,]) my[i]=mean(y[i,]) s[i]=sqrt(sd(x[i,])^2/nc+sd(y[i,])^2/nc) } ordsd=s[ord$ix] #window size 50 ws=50 aa=1:nrg all=cbind(x,y) #computes the t-statistics and the regularized t-statistics for (i in 1:nrg){ d[i]=(mx[i]-my[i])/s[i] ii=aa[ord$ix==i] lw=max(0,ii-ws) uw=min(nrg,ii+ws) sav[i]=mean(ordsd[lw:uw]) regd[i]=(mx[i]-my[i])/sqrt((nc-1)/10*s[i]^2+(1-(nc-1)/10)*sav[i]^2) } ##############SAM method############## for (i in 1:nrg){ mx[i]=mean(x[i,]) my[i]=mean(y[i,]) s[i]=sqrt(sd(x[i,])^2/4+sd(y[i,])^2/4) } #computes s_0 const=100 q=array(0,c(const+1)) samd=array(0,c(nrg,const)) for (j in 1:const+1){ q[j]=quantile(s,(j-1)/const) } qq=array(0,c(20)) samd=array(0,c(nrg,20)) for (j in 1:21){ qq[j]=quantile(s,(j-1)/20) } samscore=array(0,c(nrg)) samd=array(0,c(nrg,20)) smd=array(0,c(nrg)) for (i in 1:nrg){ for (j in 1:20){ samd[i,j]=(mx[i]-my[i])/(s[i]+qq[j]) } } v=array(0,c(const)) cv=array(0,c(20)) for (k in 1:20){ for (j in 1:const){ v[j]=mad(samd[s>=q[j] & sdelt[i] & ssam< 0)]) tru=length(which(ssam< cutoff[i])) fls=length(which(dee< cutoff[i]))/50 FDRsam[i]=min(fls/tru,1) nrpicked[i]=length(which(smd[find]< cutoff[i])) } cbind(FDRsam,cutoff,nrpicked) library(stepfun) dz=ecdf(smd) rsmd=dz(smd) length(which(rsmd[find] < 0.02)) ###################WR METHOD############################ znr=nr1*nr2 zsg=array(0,c(nrg,znr)) zde=array(0,c(nrg,znr)) mgx=array(0,c(nrg,nr)) mgy=array(0,c(nrg,nr)) zmgx=array(0,c(nrg,znr)) zmgy=array(0,c(nrg,znr)) #resamples the original sample for (i in 1:nrg){ for (j in 1:nr1){ for (k in 1:nr2){ zmgx[i,k+(j-1)*nr1]=mean(x[i,-j]) zmgy[i,k+(j-1)*nr2]=mean(y[i,-k]) zsg[i,k+(j-1)*nr1]=sqrt(sd(x[i,-j])^2/znc+sd(y[i,-k])^2/znc) } } } #finds out the within subgroup variation relative to other resamplings o=array(0,c(nrg,znr)) q=array(0,c(nrg,znr)) ww=array(0,c(nrg,znr)) for (i in 1:nrg){ for (j in 1:znr){ ww[i,j]=(zsg[i,j]-mean(zsg[i,-j]))/mean(zsg[i,-j]) } ind=sort(ww[i,],index.return=TRUE) o[i,]=ind$ix q[i,]=ind$x } #defines the prior standard deviation s00=median(zsg) #computes the regularized t-statistics for each resampling for (i in 1:nrg){ for (j in 1:znr){ zde[i,j]=(zmgx[i,j]-zmgy[i,j])/(0.3*zsg[i,j]+0.7*s00) } } #assigns the weights we=array(0,c(znr)) for (i in 1:znr){ we[i]=length(which(o[,1]==i)) } kzde=array(0,c(nrg)) for (i in 1:nrg){ if (!is.na(q[i,1])) { if (q[i,1]< -0.5) kzde[i]=(sum(zde[i,])+8*zde[i,o[i,1]])/(znr+8) else kzde[i]=mean(zde[i,]) } } #determines the top z% of the genes according to each scoring library(stepfun) dz=ecdf(kzde) rzde=dz(kzde) dz=ecdf(regd) rregd=dz(regd) dz=ecdf(d) rrd=dz(d) dz=ecdf(smd) rsmd=dz(smd) frsam=array(0,c(con)) frrt=array(0,c(con)) frwr=array(0,c(con)) trh=1:con trh=trh/(10*con) for (i in 1:con){ frsam[i]=length(which(rsmd[find]< trh[i])) frrt[i]=length(which(rregd[find]< trh[i])) frwr[i]=length(which(rzde[find]< trh[i])) } #produces "ROC" curve plot(trh,frwr,type='l',col='red',xlim=c(0,0.1),lty=1,lwd=4,xlab="proportion of total genome picked",ylab="number of RD genes found among them") legend(0.06,40,c("WR","RT","SAM"),lty=c(1,2,3)) lines(trh,frrt,lty=2,lwd=4) lines(trh,frsam,lty=3,lwd=4) ################FDR############################### ns=100 regde=array(0,c(nrg,ns)) tde=array(0,c(nrg,ns)) wrdee=array(0,c(nrg,ns)) fdrsg=array(0,c(nrg,ns)) #mimics null-scores for (i in 1:nrg){ for (j in 1:ns){ sg1=sample(1:(2*nr1),nr1) sg2=setdiff(1:(2*nr1),sg1) for (k in 1:nr1){ for (l in 1:nr2){ s1=sg1[setdiff(1:nr1,k)] s2=sg2[setdiff(1:nr2,l)] wrdee[i,j]=wrdee[i,j]+(mean(all[i,s1])-mean(all[i,s2]))/sqrt(0.3*(sd(all[i,s1])^2/znc+sd(all[i,s2])^2/znc)+0.7*s00^2) } } wrdee[i,j]=wrdee[i,j]/(nr1*nr2) fdrsg[i,j]=sqrt(sd(all[i,sg1])^2/nc+sd(all[i,sg2])^2/nc) regde[i,j]=(mean(all[i,sg1])-mean(all[i,sg2]))/sqrt((nc-1)/10*fdrsg[i,j]^2+(1-(nc-1)/10)*sav[i]^2) tde[i,j]=(mean(all[i,sg1])-mean(all[i,sg2]))/fdrsg[i,j] } } const=70 FDRt=array(0,c(const)) FDRrt=array(0,c(const)) FDRwr=array(0,c(const)) a1=array(0,c(const)) a2=array(0,c(const)) b1=array(0,c(const)) b2=array(0,c(const)) c1=array(0,c(const)) c2=array(0,c(const)) kuszob1=1:const kuszob1=kuszob1/10 #fix Pi_0=0.9 for (i in 1:const){ a1[i]=length(which(regde < -kuszob1[i]))/(ns*nrg) a2[i]=length(which(regd < -kuszob1[i]))/nrg b1[i]=length(which(tde < -kuszob1[i]))/(ns*nrg) b2[i]=length(which(d < -kuszob1[i]))/nrg c1[i]=length(which(wrdee < -kuszob1[i]))/(ns*nrg) c2[i]=length(which(kzde < -kuszob1[i]))/nrg FDRt[i]=min(0.9*b1[i]/b2[i],1) FDRrt[i]=min(0.9*a1[i]/a2[i],1) FDRwr[i]=min(0.9*c1[i]/c2[i],1) } ######## #alternative Pi_0 kuszob2=1.5 a3=length(which(regd > -kuszob2 & regd<0))/nrg a4=length(which(regde > -kuszob2 & regde<0))/(ns*nrg) b3=length(which(d > -kuszob2 & d<0))/nrg b4=length(which(tde > -kuszob2 & tde<0))/(ns*nrg) c3=length(which(kzde > -kuszob2 & kzde<0))/nrg c4=length(which(wrdee > -kuszob2 & wrdee<0))/(ns*nrg) a3/a4 b3/b4 c3/c4 #plots threshold against FDR for each method plot(kuszob1,FDRt,xlim=c(0,7),ylim=c(0,1),type="l") lines(kuszob1,FDRrt,col='green') lines(kuszob1,FDRwr,col='red') FDRwr1=array(0,c(const)) FDRrt1=array(0,c(const)) FDRsam1=array(0,c(const)) for (i in 1:const){ FDRwr1[i]=length(which(kzde[find]< -kuszob1[i])) FDRrt1[i]=length(which(regd[find]< -kuszob1[i])) FDRsam1[i]=length(which(smd[find]< -kuszob1[i])) } #plots the ROC curve plot(FDRwr,FDRwr1/73,type='l',lwd=4,lty=1,col='red',xlim=c(0,1),ylim=c(0.3,1),xlab="FDR",ylab="True positive rate (sensitivity)") legend(0.5,0.6,c("WR","RT","SAM"),lty=c(1,2,3)) lines(FDRrt,FDRrt1/73,lwd=4,lty=2) lines(FDRsam,nrpicked/73,lwd=4,lty=3) ReducedGeneId[find[which(regd[find]< -3.6)]] ReducedGeneId[find[which(kzde[find]< -3.5)]] ReducedGeneId[find[which(smd[find]< -1.7)]] ########PAN############ #creating the null-scores pan0=array(0,c(nrg)) pan1=array(0,c(nrg)) for (i in 1:nrg){ temp1=sample(1:4,2) temp2=setdiff(5:8,2) sg1=union(temp1,temp2) sg2=setdiff(1:8,sg1) pan0[i]=(mean(all[i,sg1])-mean(all[i,sg2]))/sqrt(sd(all[i,sg1])^2/4+sd(all[i,sg2])^2/4) pan1[i]=(mean(x[i,])-mean(y[i,]))/sqrt(sd(x[i,])^2/4+sd(y[i,])^2/4) }