################################################################ #This program file: # #Reads in the raw data from txt file # #Performs background subtraction # #Uses Lowess normalization # #Across replicate normalization # #Replicate correlation checking # #Scatterplots channel normalized intensities against each other# ################################################################ #######READING IN RAW DATA######## #number of replicates in each group nr1=4 nr2=4 X31= read.delim("C:\\Zoli\\R\\Type12\\1_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X32= read.delim("C:\\Zoli\\R\\Type12\\2_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X33= read.delim("C:\\Zoli\\R\\Type12\\3_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X34= read.delim("C:\\Zoli\\R\\Type12\\4_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X35= read.delim("C:\\Zoli\\R\\Type35\\5_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X36= read.delim("C:\\Zoli\\R\\Type35\\6_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X37= read.delim("C:\\Zoli\\R\\Type20\\7_test_cy3.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X51= read.delim("C:\\Zoli\\R\\Type12\\1_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X52= read.delim("C:\\Zoli\\R\\Type12\\2_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X53= read.delim("C:\\Zoli\\R\\Type12\\3_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") X54= read.delim("C:\\Zoli\\R\\Type12\\4_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X55= read.delim("C:\\Zoli\\R\\Type35\\5_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X56= read.delim("C:\\Zoli\\R\\Type35\\6_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") #X57= read.delim("C:\\Zoli\\R\\Type20\\7_test_cy5.txt", header = TRUE, sep = "\t", quote="\"", dec=".") ng=dim(X31)[1] GeneId=X31[,7] a1=array(0,c(ng,nr1)) b1=array(0,c(ng,nr2)) a2=array(0,c(ng,nr1)) b2=array(0,c(ng,nr2)) a1=cbind(X31[,11],X32[,11],X33[,11],X34[,11])#,X35[,11],X36[,11])#,X37[,11]) b1=cbind(X51[,11],X52[,11],X53[,11],X54[,11])#,X55[,11],X56[,11])#,X57[,11]) a2=cbind(X31[,12],X32[,12],X33[,12],X34[,12])#,X35[,12],X36[,12])#,X37[,12]) b2=cbind(X51[,12],X52[,12],X53[,12],X54[,12])#,X55[,12],X56[,12])#,X57[,12]) #background subtraction x=array(a1-a2,dim=c(ng,nr1)) y=array(b1-b2,dim=c(ng,nr2)) ######NORMALIZATION########## nr1=dim(x)[2] nr2=dim(y)[2] lost=c() for (r in 1:nr1){ for (i in 1:ng){ if (x[i,r]<=0){ x[i,r]=NA lost=union(lost,i) } } } for (r in 1:nr2){ for (i in 1:ng){ if (y[i,r]<=0){ y[i,r]=NA lost=union(lost,i) } } } #excluding control spots contr=c() temp1=array(1,c(43)) temp2=c(1:17,111,128,143:147,162,179,273:289) temp2=array(temp2,dim=c(43)) for (i in 1:16){ contr=union(contr,temp2+(i-1)*289*temp1) } contr=array(contr,dim=c(688)) x[contr,]=NA; y[contr,]=NA; excl=c() excl=union(contr,lost) x=x[-excl,] y=y[-excl,] nrg=length(x[,1]) x=array(x,c(nrg,nr1)) y=array(y,c(nrg,nr1)) A=array(0,c(nrg,nr1)) M=array(0,c(nrg,nr2)) A=log(x*y) M=log(x/y) ##############LOWESS########### for (i in 1:nr1){ a<-lowess(A[,i],M[,i],f=1/3) yy=sort(A[,i],index.return=TRUE) for (j in 1:nrg){ M[yy$ix[j],i]=M[yy$ix[j],i]-a$y[j] } } for (j in 1:nr1){ for (i in 1:nrg){ x[i,j]=(A[i,j]+M[i,j])/2 y[i,j]=(A[i,j]-M[i,j])/2 } } #########across replicates########### med1=array(0,c(nr1)) med2=array(0,c(nr2)) for (r in 1:nr1){ med1[r]=mean(x[,r],na.rm=TRUE) med2[r]=mean(y[,r],na.rm=TRUE) } for (i in 1:nrg){ for (j in 1:nr1){ x[i,j]=x[i,j]/med1[j] y[i,j]=y[i,j]/med2[j] }} le=length(excl) excl=array(excl,dim=c(le)) nrg=dim(x)[1] ReducedGeneId=array(0,c(nrg)) ReducedGeneId=t(GeneId)[-excl] LostGeneId=t(GeneId)[excl] nl=length(LostGeneId) LostGeneId=array(LostGeneId,dim=c(nl)) ReducedGeneId=array(ReducedGeneId,dim=c(nrg)) xx=array(0,c(nrg)) yy=array(0,c(nrg)) for (i in 1:nrg){ xx[i]=mean(x[i,]) yy[i]=mean(y[i,]) } ####overall replicate reliability####### rel1=array(0,c(nr1)) rel2=array(0,c(nr2)) for (j in 1:nr1){ rel1[j]=cor(xx,x[,j],use = "pairwise.complete.obs") } for (j in 1:nr2){ rel2[j]=cor(yy,y[,j],use = "pairwise.complete.obs") } rel1 rel2 #########reading in Gene IDs from file######## namesTB= read.delim("C:\\Zoli\\R\\GenomeTBVLA.txt", header = TRUE, sep = "\t", quote="\"", dec=".") ngns=dim(namesTB)[1] ID=array(namesTB[,1],c(ngns)) key=array(namesTB[,2],c(ngns)) genome=array(0,c(ngns)) genome=pmatch(ID,ReducedGeneId) genome=array(genome,c(length(genome))) ##RD Regions RD1=3871:3878 RD2=1978:1987 RD3=1572:1587 RD4=1506:1516 RD5=2347:2352 RD6=3423:3428 RD7=1964:1977 RD8=3616:3623 RD9=2072:2074 RD10=21 RD11=2645:2660 RD12=3117:3121 RD13=1255:1257 RD14=1764:1773 RD17=1563 RD=c(RD17,RD7,RD10,RD13,RD4,RD11,RD6,RD8,RD12,RD5,RD9) find=genome[RD] #plotting average log channel intensities against each other plot(xx,yy,pch=19,col=1) points(xx[find],yy[find],pch=19,col=2) lines(c(0,1.5),c(0,1.5))