## Analysis of Pear Gene Expression ##Research Leads: Chris Hendrickson/Amit Dhingra ##Code: Mark E. Swanson ##"2015-02-27 10:18:13 PST" ##--------------------Data Import and Cleaning--------------------## dir() #February 2015 data #This data has only 28 levels. #This data has newly created 'phen2' variable with levels H,C, and R #for Harvest, Conditioned, Ripe chris=read.csv("newdata_Feb2015/data_NMDSfinal_02102015.csv",header=T) #Older data with 92 targets. chris.f=read.csv("AllReps_AllTargets_NMDS_formatted.csv",header=T) names(chris)=c("target","var","chill","phen","phen2","biol_rep","cq", "meta") #target = gene (factor) #var = variety (factor) #chill = chilling treatment (factor) #phen = phenology (factor) #phen2 = phenology (factor) #biol_rep = biological replicate (integer code; not needed) #cq = expression measurement (float number) #meta = quality of data (factor) dim(chris) chris$biol_rep=as.factor(chris$biol_rep) #Identify 'NA' values #chris[apply( chris , 1 , function(x) any(is.na(x)) ),] #Replace with assumed maximal number of cycles, ==51 #chris$exp[apply( chris , 1 , function(x) any(is.na(x)) )] <-51 #Review the structure of the database str(chris) #Subset to eliminate variable 'phen' and 'meta'. chris2=chris[,c(1:3,5:7)] ##--------------------#Create block variable--------------------## chris2$bl=as.factor(rep(rep(c(1:4),each=30),28)) ##--------------------#Generalized linear models--------------------## #Removed. ##------------------------ORDINATION--------------------------------## wide <- reshape(chris2, timevar=c("target"), idvar = c("bl","biol_rep","var","chill","phen2"), direction = "wide") dim(wide) write.csv(wide,"chris_wide2.csv") require(vegan) ?metaMDS #Create parsimonious identifier column id= paste( substr(wide$var,1,1), substr(wide$chill,1,1), substr(wide$phen2,1,1), sep="") wide=cbind(id,wide) wide[1:5,c(1:10)] x<-c() for(i in 6:34) x[i]<-substr(names(wide)[i],4,nchar(names(wide)[i])) wide2=wide[,7:34] #[,7:34] #Examine rows with NAs #wide2[ , apply( wide2 , 2 , function(x) any(is.na(x)) ) ] #wide3=wide2[ , ! apply( wide2 , 2 , function(x) any(is.na(x)) ) ] try2=metaMDS(wide2) try2=metaMDS(wide2,previous.best=try1) scores(try1,display=c("sites")) scores(try1,display=c("species")) pear.scores=scores(try1,display=c("sites")) pear.scores=data.frame(id,pear.scores) #bind ##-------------------Graphing the Ordination---------------------## #Plot by id. pear.plot=function(x,y="new",col="black",sym=1) { data1=pear.scores[pear.scores$id==x,] if(y=="add"){ points(data1$NMDS1,data1$NMDS2, xlim=c(-0.15,0.12), ylim=c(-0.10,0.15), col=col, pch=sym, main=NULL)} else { plot(data1$NMDS1,data1$NMDS2, xlim=c(-0.15,0.12), ylim=c(-0.10,0.15), col=col, pch=sym, main=NULL)} hull1=chull(data1$NMDS1,data1$NMDS2) hull1=c(hull1,hull1[1]) hull2=cbind(data1$NMDS1[hull1],data1$NMDS2[hull1]) lines(hull2,col=col) } levels(pear.scores$id) #"ACC" "ACH" "ACR" "ANC" "ANR" "BCC" "BCH" "BCR" "BNC" "BNR" pear.plot("ACH") #black for harvest pear.plot("ACC",y="add",col="green") #green for conditioned pear.plot("ACR",y="add",col="red") #red for ripe pear.plot("ANC",y="add",col="blue") pear.plot("ANR",y="add",col="yellow") legend(-0.15,0.03,legend=c("ACH","ACC","ACR","ANC","ANR"), col=c("black","green","red","blue","yellow"), lty=1,cex=0.7) pear.plot("BCH",y="add",sym=3) pear.plot("BCC",y="add",col="green",sym=3) pear.plot("BCR",y="add",col="red",sym=3) pear.plot("BNC",y="add",col="blue",sym=3) pear.plot("BNR",y="add",col="yellow",sym=3) legend(-0.15,0.0,legend=c("BCH","BCC","BCR","BNC","BNR"), col=c("black","green","red","blue","yellow"), lty=1,cex=0.7) #Legend for variety comparison 1: ACH, ACC, BCH, BCC legend(-0.2,0.14,legend=c("ACH","ACC","BCH","BCC"), col=c("black","black","green","green"), pch=c(1,3,1,3),cex=0.7) #Legend for variety comparison 2: ACR, ANC, ANR, BCR, BNC, BNR legend(-0.2,0.14,legend=c("ACR","ANC","ANR","BCR","BNC","BNR"), col=c("red","blue","yellow","red","blue","yellow"), pch=c(1,1,1,3,3,3),cex=0.7) #Plot gene target arrows. gene.plot=read.delim("clipboard") x<-c() for(i in 1:16) x[i]<-substr(gene.plot$Target[i],4, nchar(as.character((gene.plot$Target)))) gene.plot$Target=as.factor(x); rm(x) plot(NULL,NULL, xlim=c(-0.05,0.05), ylim=c(-0.05,0.05), xlab="NMDS-1",ylab="NMDS-2", main=NULL) points(0,0,pch=3,col="red") abline(v=0,col="red",lty=3); abline(h=0,col="red",lty=3) for (i in 1:16){ lines(c(0,gene.plot$NMDS1[i]),c(0,gene.plot$NMDS2[i]),lty=3) text(gene.plot$NMDS1[i], gene.plot$NMDS2[i],gene.plot$Target[i],cex=0.8, font=2) } text(-0.032,0.035,"RAN1",cex=0.8,font=2) arrows(-0.034,0.041,-0.039,0.047,length=0.1) ###-----------------Centroid and Traces----------------------### jpeg(filename = "LineTrace.jpg", width = 600, height = 480/1.618, units = "px", pointsize = 12, quality = 100,res=400) centroids=aggregate(pear.scores[,2:3],list(pear.scores$id),mean) plot(NULL,NULL, xlim=c(-0.05,0.07), ylim=c(-0.05,0.05), xlab="NMDS-1",ylab="NMDS-2", main=NULL) text(centroids$NMDS1,centroids$NMDS2,centroids$Group.1,cex=1,font=2, col=c(rep("black",5),rep("green3",5))) lines(centroids$NMDS1[c(2,1,3)],centroids$NMDS2[c(2,1,3)],lty=1) lines(centroids$NMDS1[c(2,4,5)],centroids$NMDS2[c(2,4,5)],lty=2) lines(centroids$NMDS1[c(7,6,8)],centroids$NMDS2[c(7,6,8)],lty=1, col="green3") lines(centroids$NMDS1[c(7,9,10)],centroids$NMDS2[c(7,9,10)],lty=2, col="green3") dev.off() #Extraction of gene NMS scores gene.scores=scores(try1,display=c("species")) gene.scores=data.frame(row.names(gene.scores),as.matrix(gene.scores)) row.names(gene.scores)<-NULL names(gene.scores)=c("gene","NMS1","NMS2") gene.scores$gene2=substr(gene.scores$gene,8,15) gene.scores$gene<-NULL gene.scores$dist=sqrt(gene.scores$NMS1^2+gene.scores$NMS2^2) head(gene.scores) write.csv(gene.scores,"chris_gene_scores.csv") #Plotting of scores plot(try1, display = c("sites"), choices = c(1), type = "n") points(try1, display = c("species"), choices = c(2), type = "p") names(try1) plot(try1, display = c("sites"), type="n") text(pear.scores[,1],pear.scores[,2],wide$id,cex=0.7) points(try1, display = c("species"), type="p") text(try1, display = c("sites")) ##Stress Plot for First and Second Runs str(try1) stress=read.delim("clipboard") str(stress) plot(stress$run,stress$Initial,type="both",xlab="Ordination Run", ylab="Stress",ylim=c(0,0.5)) points(stress$run,stress$Second,type="both",pch=2) legend() ##----------------Goodness of Fit of Ordination------------------## stressplot(try1) #shows a non-metric fit R2 of 0.968 #Shows a linear fit R2 of 0.894 ##ANOVA for 35 genes levels(chris2$target) sink("chris_anova.txt") for (i in as.character(levels(chris2$target))){ cat("*****-------- Gene Target:",i, "-------******\n\n") summary(lm(cq~var*chill*phen, data=chris2[chris2$target==i,])) print(TukeyHSD(aov(cq~var*chill*phen, data=chris2[chris2$target==i,]))) } sink() ?cat summary(anova(lm(cq~var*chill*phen,data=chris2[chris2$target=="MTAN",]))) ##----------------FIRMNESS---------------------------## firm=read.csv("firm.csv",head=T) firm$phen2=as.Date(firm$phen,"%m/%d/%Y") firmc=firm[firm$chill=="C",] plot(firm$phen2,firm$firmness,pch=as.numeric(firm$condTemp), col=as.numeric(firm$chill)) interaction.plot(firmc$) ###-------------------------------------------------------------------### ###-------------------------------------------------------------------### ###-------------------------------------------------------------------### ###-------------------------------------------------------------------### # Old Data- 93 Targets ---------------------------------------------------- ##--------------------Data Import and Cleaning--------------------## #Older data with 93 targets. #This data also has been given a 'phen2' recoded variable. chris93=read.csv("all93genes/PearGenes_93targets.csv", header=T) chris93$block= #target = gene (factor) #var = variety (factor) #chill = chilling treatment (factor) #phen = phenology (factor) #phen2 = phenology (factor) #biol_rep = biological replicate (integer code; not needed) #cq = expression measurement (float number) #meta = quality of data (factor) dim(chris.f) chris93$rep=as.factor(chris93$rep) #Identify 'NA' values #chris[apply( chris , 1 , function(x) any(is.na(x)) ),] #Replace with assumed maximal number of cycles, ==51 #chris$exp[apply( chris , 1 , function(x) any(is.na(x)) )] <-51 #Review the structure of the database str(chris93) #Subset to eliminate variable 'exp'. chris93.2=chris93[,c(1:3,5:7)] #Subset to eliminate 'NA' rows chris93.2=chris93.2[!is.na(chris93.2$exp),] dim(chris93.2) #should be 2781 rows ##--------------#Create block variable for tech reps---------------## #chris93.2$bl=as.factor(rep(rep(c(1:4),each=30),28)) ##--------------------#Generalized linear models--------------------## #Removed. ##------------------------ORDINATION--------------------------------## wide93 <- reshape(chris93.2, timevar=c("target"), idvar = c("rep","var","chill","phen2"), direction = "wide") dim(wide93) write.csv(wide93,"chris_wide93.csv") require(vegan) ?metaMDS #Create parsimonious identifier column id= paste( substr(wide93$var,1,1), substr(wide93$chill,1,1), substr(wide93$phen2,1,1), sep="") wide93=cbind(id,wide93) write.csv(wide93,"all93genes/wide93.csv") wide93[1:5,c(1:10)] wide93=read.csv("all93genes/wide93_edit.csv",header=T) wide93.2=wide93[,6:98] #[,6:98] wide93.2=wide93.2[,-c(77)] try1=metaMDS(wide93.2) try1=metaMDS(wide93.2,previous.best=try1) scores(try1,display=c("sites")) gene.scores=scores(try1,display=c("species")) pear.scores=scores(try1,display=c("sites")) pear.scores=data.frame(id,pear.scores) #bind ##-------------------Graphing the Ordination---------------------## #Plot by id. pear.plot=function(x,y="new",col="black",sym=1) { data1=pear.scores[pear.scores$id==x,] if(y=="add"){ points(data1$NMDS1,data1$NMDS2, xlim=c(-0.15,0.12), ylim=c(-0.10,0.15), col=col, pch=sym, main=NULL)} else { plot(data1$NMDS1,data1$NMDS2, xlim=c(-0.15,0.12), ylim=c(-0.10,0.15), col=col, pch=sym, main=NULL)} hull1=chull(data1$NMDS1,data1$NMDS2) hull1=c(hull1,hull1[1]) hull2=cbind(data1$NMDS1[hull1],data1$NMDS2[hull1]) lines(hull2,col=col) } levels(pear.scores$id) #"ACC" "ACH" "ACR" "ANC" "ANR" "BCC" "BCH" "BCR" "BNC" "BNR" pear.plot("ACH") #black for harvest pear.plot("ACC",y="add",col="green") #green for conditioned pear.plot("ACR",y="add",col="red") #red for ripe pear.plot("ANC",y="add",col="blue") pear.plot("ANR",y="add",col="yellow") legend(-0.15,0.03,legend=c("ACH","ACC","ACR","ANC","ANR"), col=c("black","green","red","blue","yellow"), lty=1,cex=0.7) pear.plot("BCH",y="add",sym=3) pear.plot("BCC",y="add",col="green",sym=3) pear.plot("BCR",y="add",col="red",sym=3) pear.plot("BNC",y="add",col="blue",sym=3) pear.plot("BNR",y="add",col="yellow",sym=3) legend(-0.15,0.0,legend=c("BCH","BCC","BCR","BNC","BNR"), col=c("black","green","red","blue","yellow"), lty=1,cex=0.7) #Legend for variety comparison 1: ACH, ACC, BCH, BCC legend(-0.2,0.14,legend=c("ACH","ACC","BCH","BCC"), col=c("black","black","green","green"), pch=c(1,3,1,3),cex=0.7) #Legend for variety comparison 2: ACR, ANC, ANR, BCR, BNC, BNR legend(-0.2,0.14,legend=c("ACR","ANC","ANR","BCR","BNC","BNR"), col=c("red","blue","yellow","red","blue","yellow"), pch=c(1,1,1,3,3,3),cex=0.7) #Plot gene target arrows. gene.plot=read.delim("clipboard") gene.plot=data.frame(gene.plot[gene.plot$include==1,]) plot(NULL,NULL, xlim=c(-0.05,0.05), ylim=c(-0.05,0.08), xlab="NMDS-1",ylab="NMDS-2", main=NULL) points(0,0,pch=3,col="red") for (i in 1:16){ lines(c(0,gene.plot$NMDS1[i]),c(0,gene.plot$NMDS2[i]),lty=3) text(gene.plot$NMDS1[i], gene.plot$NMDS2[i],gene.plot$Target[i],cex=0.7, font=2) } text(-0.045,0.06,"RTE1",cex=0.7,font=2) arrows(-0.045,0.064,-0.05,0.07,length=0.1) text(0.032,-0.035,"RAN1",cex=0.7,font=2) arrows(0.034,-0.041,0.039,-0.051,length=0.1) ###-----------------Centroid and Traces----------------------### centroids=aggregate(pear.scores[,2:3],list(pear.scores$id),mean) plot(NULL,NULL, xlim=c(-0.05,0.07), ylim=c(-0.05,0.05), xlab="NMDS-1",ylab="NMDS-2", main=NULL) text(centroids$NMDS1,centroids$NMDS2,centroids$Group.1,cex=1,font=2, col=c(rep("black",5),rep("green3",5))) lines(centroids$NMDS1[c(2,1,3)],centroids$NMDS2[c(2,1,3)],lty=1) lines(centroids$NMDS1[c(2,4,5)],centroids$NMDS2[c(2,4,5)],lty=2) lines(centroids$NMDS1[c(7,6,8)],centroids$NMDS2[c(7,6,8)],lty=1, col="green3") lines(centroids$NMDS1[c(7,9,10)],centroids$NMDS2[c(7,9,10)],lty=2, col="green3") #Extraction of gene NMS scores gene.scores=scores(try1,display=c("species")) gene.scores=data.frame(row.names(gene.scores),as.matrix(gene.scores)) row.names(gene.scores)<-NULL names(gene.scores)=c("gene","NMS1","NMS2") gene.scores$gene2=substr(gene.scores$gene,8,15) gene.scores$gene<-NULL gene.scores$dist=sqrt(gene.scores$NMS1^2+gene.scores$NMS2^2) head(gene.scores) write.csv(gene.scores,"chris_gene_scores.csv") #Plotting of scores plot(try1, display = c("sites"), choices = c(1), type = "n") points(try1, display = c("species"), choices = c(2), type = "p") names(try1) plot(try1, display = c("sites"), type="n") text(pear.scores[,1],pear.scores[,2],wide$id,cex=0.7) points(try1, display = c("species"), type="p") text(try1, display = c("sites")) ##Stress Plot for First and Second Runs stress=read.delim("clipboard") str(stress) plot(stress$run,stress$Initial,type="both",xlab="Ordination Run", ylab="Stress",ylim=c(0,0.5)) points(stress$run,stress$Second,type="both",pch=2) legend() ##----------------Goodness of Fit of Ordination------------------## stressplot(try1) #shows a non-metric fit R2 of 0.968 #Shows a linear fit R2 of 0.894 ##ANOVA for 35 genes levels(chris2$target) sink("chris_anova.txt") for (i in as.character(levels(chris2$target))){ cat("*****-------- Gene Target:",i, "-------******\n\n") summary(lm(cq~var*chill*phen, data=chris2[chris2$target==i,])) print(TukeyHSD(aov(cq~var*chill*phen, data=chris2[chris2$target==i,]))) } sink() ?cat summary(anova(lm(cq~var*chill*phen,data=chris2[chris2$target=="MTAN",]))) ##----------------FIRMNESS---------------------------## firm=read.csv("firm.csv",head=T) firm$phen2=as.Date(firm$phen,"%m/%d/%Y") firmc=firm[firm$chill=="C",] plot(firm$phen2,firm$firmness,pch=as.numeric(firm$condTemp), col=as.numeric(firm$chill)) interaction.plot(firmc$)